| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9978118161925602, |
| "eval_steps": 500, |
| "global_step": 171, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005835156819839533, |
| "grad_norm": 3.0967645370477803, |
| "learning_rate": 0.0, |
| "loss": 1.0698, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.011670313639679067, |
| "grad_norm": 3.240964653147952, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 1.0714, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0175054704595186, |
| "grad_norm": 3.286415256381705, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 1.1575, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.023340627279358133, |
| "grad_norm": 4.198907257722359, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.359, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.029175784099197667, |
| "grad_norm": 3.4693235279579637, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 0.9649, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0350109409190372, |
| "grad_norm": 3.0568945268705883, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 1.0989, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.040846097738876735, |
| "grad_norm": 2.2641958909637157, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.9482, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.046681254558716266, |
| "grad_norm": 2.5090273909864367, |
| "learning_rate": 3.88888888888889e-06, |
| "loss": 1.1167, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0525164113785558, |
| "grad_norm": 1.976815998901389, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 1.0178, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.058351568198395334, |
| "grad_norm": 2.1819711414316108, |
| "learning_rate": 5e-06, |
| "loss": 1.2776, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06418672501823487, |
| "grad_norm": 1.8201699012478207, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 1.1955, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0700218818380744, |
| "grad_norm": 1.5462205611641233, |
| "learning_rate": 6.111111111111112e-06, |
| "loss": 0.9814, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.07585703865791393, |
| "grad_norm": 1.8781521337402638, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.0493, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.08169219547775347, |
| "grad_norm": 1.7561324099023312, |
| "learning_rate": 7.222222222222223e-06, |
| "loss": 0.9224, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.087527352297593, |
| "grad_norm": 1.4418880184516636, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 0.8082, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.09336250911743253, |
| "grad_norm": 1.8346753334483505, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 1.0255, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.09919766593727207, |
| "grad_norm": 1.5851713574171153, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 0.9778, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.1050328227571116, |
| "grad_norm": 1.670718527610444, |
| "learning_rate": 9.444444444444445e-06, |
| "loss": 0.9377, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.11086797957695113, |
| "grad_norm": 1.6015249574772341, |
| "learning_rate": 1e-05, |
| "loss": 0.9287, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.11670313639679067, |
| "grad_norm": 1.6989482004469072, |
| "learning_rate": 9.998945997517957e-06, |
| "loss": 1.152, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12253829321663019, |
| "grad_norm": 1.3452249233397808, |
| "learning_rate": 9.99578443444032e-06, |
| "loss": 0.8807, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.12837345003646974, |
| "grad_norm": 2.1487234818996686, |
| "learning_rate": 9.990516643685222e-06, |
| "loss": 0.7627, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.13420860685630925, |
| "grad_norm": 1.923473946779632, |
| "learning_rate": 9.983144846158472e-06, |
| "loss": 1.2667, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.1400437636761488, |
| "grad_norm": 1.1277306360256565, |
| "learning_rate": 9.973672149817232e-06, |
| "loss": 0.7689, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.14587892049598833, |
| "grad_norm": 1.1475575701592111, |
| "learning_rate": 9.96210254835968e-06, |
| "loss": 0.7796, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.15171407731582787, |
| "grad_norm": 1.423352108857882, |
| "learning_rate": 9.948440919541277e-06, |
| "loss": 1.0162, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.1575492341356674, |
| "grad_norm": 1.3087161085162522, |
| "learning_rate": 9.932693023118299e-06, |
| "loss": 0.9945, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.16338439095550694, |
| "grad_norm": 1.3363607553089747, |
| "learning_rate": 9.91486549841951e-06, |
| "loss": 0.8672, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.16921954777534645, |
| "grad_norm": 1.1887359433001867, |
| "learning_rate": 9.894965861547023e-06, |
| "loss": 0.9, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.175054704595186, |
| "grad_norm": 1.3150047218140335, |
| "learning_rate": 9.873002502207502e-06, |
| "loss": 0.786, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18088986141502553, |
| "grad_norm": 1.3895338060465705, |
| "learning_rate": 9.848984680175049e-06, |
| "loss": 0.935, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.18672501823486506, |
| "grad_norm": 1.3275940096513228, |
| "learning_rate": 9.822922521387277e-06, |
| "loss": 0.804, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1925601750547046, |
| "grad_norm": 1.0560366942912123, |
| "learning_rate": 9.794827013676206e-06, |
| "loss": 0.7541, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.19839533187454414, |
| "grad_norm": 0.9888394869609451, |
| "learning_rate": 9.764710002135784e-06, |
| "loss": 0.6956, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.20423048869438365, |
| "grad_norm": 1.5276601674154802, |
| "learning_rate": 9.732584184127973e-06, |
| "loss": 1.1283, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.2100656455142232, |
| "grad_norm": 1.0255635415282915, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.7307, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.21590080233406272, |
| "grad_norm": 1.3172712637289348, |
| "learning_rate": 9.66236114702178e-06, |
| "loss": 0.912, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.22173595915390226, |
| "grad_norm": 1.2265556745233968, |
| "learning_rate": 9.62429353402556e-06, |
| "loss": 0.8612, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.2275711159737418, |
| "grad_norm": 1.382293666822052, |
| "learning_rate": 9.584276314284316e-06, |
| "loss": 0.9029, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.23340627279358134, |
| "grad_norm": 1.4564772263747658, |
| "learning_rate": 9.542326359097619e-06, |
| "loss": 0.9388, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.23924142961342085, |
| "grad_norm": 1.0351934922881194, |
| "learning_rate": 9.498461354608228e-06, |
| "loss": 0.7048, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.24507658643326038, |
| "grad_norm": 1.1855152897517165, |
| "learning_rate": 9.452699794345583e-06, |
| "loss": 0.8375, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.25091174325309995, |
| "grad_norm": 0.9858455620030542, |
| "learning_rate": 9.405060971428924e-06, |
| "loss": 0.6869, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2567469000729395, |
| "grad_norm": 0.9902232882820138, |
| "learning_rate": 9.355564970433288e-06, |
| "loss": 0.7302, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.26258205689277897, |
| "grad_norm": 1.0675035604533485, |
| "learning_rate": 9.30423265892184e-06, |
| "loss": 0.8118, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.2684172137126185, |
| "grad_norm": 1.3867123886210095, |
| "learning_rate": 9.251085678648072e-06, |
| "loss": 1.0246, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.27425237053245805, |
| "grad_norm": 1.5259388197713282, |
| "learning_rate": 9.196146436431635e-06, |
| "loss": 1.1016, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2800875273522976, |
| "grad_norm": 1.2982224709981671, |
| "learning_rate": 9.13943809471159e-06, |
| "loss": 0.9131, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.2859226841721371, |
| "grad_norm": 1.0192488730773452, |
| "learning_rate": 9.08098456178111e-06, |
| "loss": 0.6878, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.29175784099197666, |
| "grad_norm": 1.3595848189757955, |
| "learning_rate": 9.020810481707709e-06, |
| "loss": 0.9331, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2975929978118162, |
| "grad_norm": 0.8812342597637117, |
| "learning_rate": 8.958941223943292e-06, |
| "loss": 0.6287, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.30342815463165573, |
| "grad_norm": 1.2990640418383663, |
| "learning_rate": 8.895402872628352e-06, |
| "loss": 0.9154, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.30926331145149527, |
| "grad_norm": 1.2083829619551534, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.8133, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.3150984682713348, |
| "grad_norm": 1.1431625625586879, |
| "learning_rate": 8.763426733072624e-06, |
| "loss": 0.7709, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.32093362509117435, |
| "grad_norm": 1.0434131741083061, |
| "learning_rate": 8.695044586103297e-06, |
| "loss": 0.8007, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.3267687819110139, |
| "grad_norm": 1.0904984432710232, |
| "learning_rate": 8.625104604667965e-06, |
| "loss": 0.8123, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.33260393873085337, |
| "grad_norm": 0.920394581392736, |
| "learning_rate": 8.553636275532236e-06, |
| "loss": 0.6425, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3384390955506929, |
| "grad_norm": 1.1503386459221627, |
| "learning_rate": 8.480669729814635e-06, |
| "loss": 0.7206, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.34427425237053244, |
| "grad_norm": 1.090627383298351, |
| "learning_rate": 8.40623573028327e-06, |
| "loss": 0.7557, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.350109409190372, |
| "grad_norm": 1.2240349403722315, |
| "learning_rate": 8.330365658386252e-06, |
| "loss": 0.896, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3559445660102115, |
| "grad_norm": 1.0765178282667567, |
| "learning_rate": 8.25309150102121e-06, |
| "loss": 0.7681, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.36177972283005105, |
| "grad_norm": 0.9793957317021386, |
| "learning_rate": 8.174445837049614e-06, |
| "loss": 0.7504, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3676148796498906, |
| "grad_norm": 1.2232761418608094, |
| "learning_rate": 8.094461823561473e-06, |
| "loss": 0.8488, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.37345003646973013, |
| "grad_norm": 1.2088998059050347, |
| "learning_rate": 8.013173181896283e-06, |
| "loss": 0.8523, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.37928519328956967, |
| "grad_norm": 1.2900325123864722, |
| "learning_rate": 7.930614183426074e-06, |
| "loss": 0.8181, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3851203501094092, |
| "grad_norm": 0.9454791838776316, |
| "learning_rate": 7.846819635106569e-06, |
| "loss": 0.7437, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.39095550692924874, |
| "grad_norm": 0.9144587470068872, |
| "learning_rate": 7.76182486480253e-06, |
| "loss": 0.628, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.3967906637490883, |
| "grad_norm": 1.15724937547755, |
| "learning_rate": 7.675665706393502e-06, |
| "loss": 0.871, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.4026258205689278, |
| "grad_norm": 1.5015191452686714, |
| "learning_rate": 7.588378484666214e-06, |
| "loss": 1.0221, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.4084609773887673, |
| "grad_norm": 1.39998905729372, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.9229, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.41429613420860684, |
| "grad_norm": 0.948492711117696, |
| "learning_rate": 7.4105675128517456e-06, |
| "loss": 0.6874, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.4201312910284464, |
| "grad_norm": 1.258374093991861, |
| "learning_rate": 7.320118728046818e-06, |
| "loss": 0.9105, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.4259664478482859, |
| "grad_norm": 0.9884107530136707, |
| "learning_rate": 7.2286917788826926e-06, |
| "loss": 0.6701, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.43180160466812545, |
| "grad_norm": 1.4165000586156353, |
| "learning_rate": 7.136325211051905e-06, |
| "loss": 1.0041, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.437636761487965, |
| "grad_norm": 1.1602616170268807, |
| "learning_rate": 7.043057966391158e-06, |
| "loss": 0.8137, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.4434719183078045, |
| "grad_norm": 0.8628006157003906, |
| "learning_rate": 6.948929366463397e-06, |
| "loss": 0.6294, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.44930707512764406, |
| "grad_norm": 1.0039942958404424, |
| "learning_rate": 6.8539790959798045e-06, |
| "loss": 0.7109, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.4551422319474836, |
| "grad_norm": 1.3931149797330953, |
| "learning_rate": 6.758247186068684e-06, |
| "loss": 0.9198, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.46097738876732314, |
| "grad_norm": 1.0513701294639524, |
| "learning_rate": 6.6617739973982985e-06, |
| "loss": 0.7459, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.4668125455871627, |
| "grad_norm": 1.028695671279348, |
| "learning_rate": 6.5646002031607726e-06, |
| "loss": 0.746, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4726477024070022, |
| "grad_norm": 0.9959898755507389, |
| "learning_rate": 6.466766771924231e-06, |
| "loss": 0.8316, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.4784828592268417, |
| "grad_norm": 1.0084464738139896, |
| "learning_rate": 6.368314950360416e-06, |
| "loss": 0.6801, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.48431801604668123, |
| "grad_norm": 1.2115089332593143, |
| "learning_rate": 6.269286245855039e-06, |
| "loss": 0.9926, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.49015317286652077, |
| "grad_norm": 1.0623534367007055, |
| "learning_rate": 6.169722409008244e-06, |
| "loss": 0.7846, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4959883296863603, |
| "grad_norm": 0.9618691552575331, |
| "learning_rate": 6.0696654160324875e-06, |
| "loss": 0.7579, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5018234865061999, |
| "grad_norm": 1.4236190190616003, |
| "learning_rate": 5.9691574510553505e-06, |
| "loss": 0.9145, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.5076586433260394, |
| "grad_norm": 0.9781934808099418, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.7268, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.513493800145879, |
| "grad_norm": 1.4459804636005047, |
| "learning_rate": 5.766958274393428e-06, |
| "loss": 0.9873, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.5193289569657185, |
| "grad_norm": 1.402656043092622, |
| "learning_rate": 5.66535231008227e-06, |
| "loss": 0.9982, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.5251641137855579, |
| "grad_norm": 1.0200801826456665, |
| "learning_rate": 5.5634658325766066e-06, |
| "loss": 0.678, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5309992706053975, |
| "grad_norm": 0.9733189988828815, |
| "learning_rate": 5.46134179731651e-06, |
| "loss": 0.7651, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.536834427425237, |
| "grad_norm": 1.4664206961259343, |
| "learning_rate": 5.359023259896638e-06, |
| "loss": 1.1306, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5426695842450766, |
| "grad_norm": 0.8504168428338762, |
| "learning_rate": 5.2565533579139484e-06, |
| "loss": 0.63, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.5485047410649161, |
| "grad_norm": 0.9453356240059136, |
| "learning_rate": 5.153975292780852e-06, |
| "loss": 0.7414, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5543398978847557, |
| "grad_norm": 1.2873508014079351, |
| "learning_rate": 5.05133231151145e-06, |
| "loss": 0.9491, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.5601750547045952, |
| "grad_norm": 1.120596468774601, |
| "learning_rate": 4.948667688488552e-06, |
| "loss": 0.8434, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5660102115244348, |
| "grad_norm": 0.933260578220278, |
| "learning_rate": 4.846024707219149e-06, |
| "loss": 0.6954, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5718453683442742, |
| "grad_norm": 1.1505604485932606, |
| "learning_rate": 4.7434466420860515e-06, |
| "loss": 0.8766, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5776805251641138, |
| "grad_norm": 1.3190064743230776, |
| "learning_rate": 4.640976740103363e-06, |
| "loss": 0.9601, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5835156819839533, |
| "grad_norm": 1.1031574587195832, |
| "learning_rate": 4.53865820268349e-06, |
| "loss": 0.8262, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5893508388037928, |
| "grad_norm": 1.2140720848727085, |
| "learning_rate": 4.436534167423395e-06, |
| "loss": 0.8474, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5951859956236324, |
| "grad_norm": 1.0868094387995848, |
| "learning_rate": 4.334647689917734e-06, |
| "loss": 0.7998, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.6010211524434719, |
| "grad_norm": 1.0979874833235823, |
| "learning_rate": 4.233041725606573e-06, |
| "loss": 0.7538, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.6068563092633115, |
| "grad_norm": 0.8239976607859979, |
| "learning_rate": 4.131759111665349e-06, |
| "loss": 0.6354, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.612691466083151, |
| "grad_norm": 1.2986211049877099, |
| "learning_rate": 4.03084254894465e-06, |
| "loss": 0.8842, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6185266229029905, |
| "grad_norm": 1.4668023858002492, |
| "learning_rate": 3.930334583967514e-06, |
| "loss": 1.1808, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.62436177972283, |
| "grad_norm": 0.9088094469191689, |
| "learning_rate": 3.8302775909917585e-06, |
| "loss": 0.7396, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.6301969365426696, |
| "grad_norm": 1.1598630921383477, |
| "learning_rate": 3.730713754144961e-06, |
| "loss": 0.8171, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.6360320933625091, |
| "grad_norm": 0.9110392040510746, |
| "learning_rate": 3.6316850496395863e-06, |
| "loss": 0.7386, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.6418672501823487, |
| "grad_norm": 1.4073871245462084, |
| "learning_rate": 3.5332332280757706e-06, |
| "loss": 0.8708, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6477024070021882, |
| "grad_norm": 1.0974400375996476, |
| "learning_rate": 3.4353997968392295e-06, |
| "loss": 0.7883, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.6535375638220278, |
| "grad_norm": 0.9061825638441261, |
| "learning_rate": 3.3382260026017027e-06, |
| "loss": 0.6915, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6593727206418672, |
| "grad_norm": 1.0771803335915262, |
| "learning_rate": 3.241752813931316e-06, |
| "loss": 0.8647, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6652078774617067, |
| "grad_norm": 0.9568570846689433, |
| "learning_rate": 3.1460209040201967e-06, |
| "loss": 0.743, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6710430342815463, |
| "grad_norm": 0.8038754456744998, |
| "learning_rate": 3.0510706335366034e-06, |
| "loss": 0.6186, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6768781911013858, |
| "grad_norm": 1.0490487646998612, |
| "learning_rate": 2.956942033608843e-06, |
| "loss": 0.7842, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.6827133479212254, |
| "grad_norm": 1.1528124191592586, |
| "learning_rate": 2.863674788948097e-06, |
| "loss": 0.8293, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.6885485047410649, |
| "grad_norm": 1.1151481901518225, |
| "learning_rate": 2.771308221117309e-06, |
| "loss": 0.8212, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6943836615609045, |
| "grad_norm": 0.9417128722043704, |
| "learning_rate": 2.6798812719531843e-06, |
| "loss": 0.6844, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.700218818380744, |
| "grad_norm": 1.032191371370444, |
| "learning_rate": 2.5894324871482557e-06, |
| "loss": 0.8145, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7060539752005836, |
| "grad_norm": 0.8894619973494138, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.7, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.711889132020423, |
| "grad_norm": 1.0056726773650362, |
| "learning_rate": 2.411621515333788e-06, |
| "loss": 0.7701, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.7177242888402626, |
| "grad_norm": 1.0869409622185178, |
| "learning_rate": 2.324334293606499e-06, |
| "loss": 0.8878, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.7235594456601021, |
| "grad_norm": 0.9975686046452821, |
| "learning_rate": 2.238175135197471e-06, |
| "loss": 0.7396, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.7293946024799417, |
| "grad_norm": 0.9651532423367729, |
| "learning_rate": 2.1531803648934333e-06, |
| "loss": 0.7326, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7352297592997812, |
| "grad_norm": 1.2022500141898183, |
| "learning_rate": 2.069385816573928e-06, |
| "loss": 0.8903, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.7410649161196207, |
| "grad_norm": 1.1194585298700934, |
| "learning_rate": 1.9868268181037186e-06, |
| "loss": 0.7492, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.7469000729394603, |
| "grad_norm": 0.9563972874909548, |
| "learning_rate": 1.9055381764385272e-06, |
| "loss": 0.7019, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.7527352297592997, |
| "grad_norm": 1.1116002934004348, |
| "learning_rate": 1.8255541629503865e-06, |
| "loss": 0.8464, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.7585703865791393, |
| "grad_norm": 0.9834353364914193, |
| "learning_rate": 1.746908498978791e-06, |
| "loss": 0.7098, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7644055433989788, |
| "grad_norm": 0.9133138576459355, |
| "learning_rate": 1.6696343416137495e-06, |
| "loss": 0.7116, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.7702407002188184, |
| "grad_norm": 1.1533333715544398, |
| "learning_rate": 1.5937642697167288e-06, |
| "loss": 0.8582, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7760758570386579, |
| "grad_norm": 1.1643056283458053, |
| "learning_rate": 1.5193302701853674e-06, |
| "loss": 0.8634, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.7819110138584975, |
| "grad_norm": 1.2470889663194495, |
| "learning_rate": 1.4463637244677648e-06, |
| "loss": 1.0618, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.787746170678337, |
| "grad_norm": 1.137971799681056, |
| "learning_rate": 1.374895395332037e-06, |
| "loss": 0.7659, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.7935813274981766, |
| "grad_norm": 0.8296991248159793, |
| "learning_rate": 1.3049554138967052e-06, |
| "loss": 0.6202, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.799416484318016, |
| "grad_norm": 1.267967690991332, |
| "learning_rate": 1.2365732669273778e-06, |
| "loss": 0.7476, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.8052516411378556, |
| "grad_norm": 1.0879072186919871, |
| "learning_rate": 1.1697777844051105e-06, |
| "loss": 0.7832, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.8110867979576951, |
| "grad_norm": 0.9784037046492985, |
| "learning_rate": 1.1045971273716476e-06, |
| "loss": 0.75, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.8169219547775346, |
| "grad_norm": 0.969625700866293, |
| "learning_rate": 1.0410587760567104e-06, |
| "loss": 0.8109, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8227571115973742, |
| "grad_norm": 1.0370004026477495, |
| "learning_rate": 9.791895182922911e-07, |
| "loss": 0.7486, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.8285922684172137, |
| "grad_norm": 1.313976521343513, |
| "learning_rate": 9.190154382188921e-07, |
| "loss": 0.9392, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.8344274252370533, |
| "grad_norm": 1.3968351116673179, |
| "learning_rate": 8.605619052884106e-07, |
| "loss": 1.0106, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.8402625820568927, |
| "grad_norm": 0.9927938469465771, |
| "learning_rate": 8.03853563568367e-07, |
| "loss": 0.7784, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.8460977388767323, |
| "grad_norm": 0.9788007150864164, |
| "learning_rate": 7.489143213519301e-07, |
| "loss": 0.8396, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8519328956965718, |
| "grad_norm": 1.0639499300007744, |
| "learning_rate": 6.957673410781617e-07, |
| "loss": 0.7964, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.8577680525164114, |
| "grad_norm": 1.2358144380534066, |
| "learning_rate": 6.444350295667112e-07, |
| "loss": 0.8458, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.8636032093362509, |
| "grad_norm": 1.1284354446219842, |
| "learning_rate": 5.949390285710777e-07, |
| "loss": 0.8905, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.8694383661560905, |
| "grad_norm": 0.9040214380343765, |
| "learning_rate": 5.473002056544191e-07, |
| "loss": 0.7118, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.87527352297593, |
| "grad_norm": 0.9693967414133844, |
| "learning_rate": 5.015386453917742e-07, |
| "loss": 0.7273, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8811086797957695, |
| "grad_norm": 1.08806034257909, |
| "learning_rate": 4.576736409023813e-07, |
| "loss": 0.762, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.886943836615609, |
| "grad_norm": 1.0602639261817013, |
| "learning_rate": 4.15723685715686e-07, |
| "loss": 0.8823, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.8927789934354485, |
| "grad_norm": 1.015659074679919, |
| "learning_rate": 3.7570646597444196e-07, |
| "loss": 0.8069, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.8986141502552881, |
| "grad_norm": 1.1556479384409875, |
| "learning_rate": 3.3763885297822153e-07, |
| "loss": 0.9008, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.9044493070751276, |
| "grad_norm": 1.1155559090709997, |
| "learning_rate": 3.015368960704584e-07, |
| "loss": 0.7752, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.9102844638949672, |
| "grad_norm": 1.1402114085019883, |
| "learning_rate": 2.6741581587202747e-07, |
| "loss": 0.849, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.9161196207148067, |
| "grad_norm": 1.0419920776286093, |
| "learning_rate": 2.3528999786421758e-07, |
| "loss": 0.8142, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.9219547775346463, |
| "grad_norm": 1.1306470417466037, |
| "learning_rate": 2.0517298632379445e-07, |
| "loss": 0.8408, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.9277899343544858, |
| "grad_norm": 1.125391381754142, |
| "learning_rate": 1.770774786127244e-07, |
| "loss": 0.7793, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.9336250911743253, |
| "grad_norm": 0.8803202289480606, |
| "learning_rate": 1.510153198249531e-07, |
| "loss": 0.7106, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9394602479941648, |
| "grad_norm": 0.9210847874254954, |
| "learning_rate": 1.2699749779249926e-07, |
| "loss": 0.6821, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.9452954048140044, |
| "grad_norm": 1.0075752224421093, |
| "learning_rate": 1.0503413845297739e-07, |
| "loss": 0.8513, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.9511305616338439, |
| "grad_norm": 0.8750161290165441, |
| "learning_rate": 8.513450158049109e-08, |
| "loss": 0.7585, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.9569657184536834, |
| "grad_norm": 1.1096291136415941, |
| "learning_rate": 6.730697688170251e-08, |
| "loss": 0.8467, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.962800875273523, |
| "grad_norm": 0.9247194424342008, |
| "learning_rate": 5.155908045872349e-08, |
| "loss": 0.7033, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9686360320933625, |
| "grad_norm": 0.9831793236532468, |
| "learning_rate": 3.7897451640321326e-08, |
| "loss": 0.7581, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.9744711889132021, |
| "grad_norm": 0.9263535840213669, |
| "learning_rate": 2.6327850182769065e-08, |
| "loss": 0.697, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.9803063457330415, |
| "grad_norm": 0.9615311264525579, |
| "learning_rate": 1.6855153841527915e-08, |
| "loss": 0.6953, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.9861415025528811, |
| "grad_norm": 0.9734970339085126, |
| "learning_rate": 9.48335631477948e-09, |
| "loss": 0.8321, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.9919766593727206, |
| "grad_norm": 0.8205128623545712, |
| "learning_rate": 4.2155655596809455e-09, |
| "loss": 0.6526, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9978118161925602, |
| "grad_norm": 0.8811350978732005, |
| "learning_rate": 1.054002482043237e-09, |
| "loss": 0.7098, |
| "step": 171 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 171, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 4050, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 18593527627776.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|