| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.8654970760233918, |
| "eval_steps": 10, |
| "global_step": 160, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11695906432748537, |
| "eval_loss": 0.291916161775589, |
| "eval_runtime": 14.114, |
| "eval_samples_per_second": 53.635, |
| "eval_steps_per_second": 6.731, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.23391812865497075, |
| "grad_norm": 0.9206290389893743, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": 0.3086, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.23391812865497075, |
| "eval_loss": 0.2310570627450943, |
| "eval_runtime": 13.833, |
| "eval_samples_per_second": 54.724, |
| "eval_steps_per_second": 6.868, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3508771929824561, |
| "eval_loss": 0.2036609798669815, |
| "eval_runtime": 13.8307, |
| "eval_samples_per_second": 54.733, |
| "eval_steps_per_second": 6.869, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.4678362573099415, |
| "grad_norm": 0.6671501778795791, |
| "learning_rate": 9.908063190205739e-06, |
| "loss": 0.2113, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4678362573099415, |
| "eval_loss": 0.1889752596616745, |
| "eval_runtime": 13.8386, |
| "eval_samples_per_second": 54.702, |
| "eval_steps_per_second": 6.865, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5847953216374269, |
| "eval_loss": 0.1784025877714157, |
| "eval_runtime": 13.7963, |
| "eval_samples_per_second": 54.87, |
| "eval_steps_per_second": 6.886, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 0.6133391984537656, |
| "learning_rate": 9.465880538792519e-06, |
| "loss": 0.1774, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "eval_loss": 0.17270329594612122, |
| "eval_runtime": 13.8427, |
| "eval_samples_per_second": 54.686, |
| "eval_steps_per_second": 6.863, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8187134502923976, |
| "eval_loss": 0.16722090542316437, |
| "eval_runtime": 13.8528, |
| "eval_samples_per_second": 54.646, |
| "eval_steps_per_second": 6.858, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.935672514619883, |
| "grad_norm": 0.5734119079085649, |
| "learning_rate": 8.689603152981262e-06, |
| "loss": 0.1663, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.935672514619883, |
| "eval_loss": 0.16241328418254852, |
| "eval_runtime": 13.8277, |
| "eval_samples_per_second": 54.745, |
| "eval_steps_per_second": 6.87, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.0467836257309941, |
| "eval_loss": 0.16258972883224487, |
| "eval_runtime": 13.8228, |
| "eval_samples_per_second": 54.764, |
| "eval_steps_per_second": 6.873, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.1637426900584795, |
| "grad_norm": 0.5466652845884847, |
| "learning_rate": 7.637304728380036e-06, |
| "loss": 0.1251, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.1637426900584795, |
| "eval_loss": 0.16274206340312958, |
| "eval_runtime": 13.8628, |
| "eval_samples_per_second": 54.607, |
| "eval_steps_per_second": 6.853, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.280701754385965, |
| "eval_loss": 0.16260136663913727, |
| "eval_runtime": 13.8176, |
| "eval_samples_per_second": 54.785, |
| "eval_steps_per_second": 6.875, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.3976608187134503, |
| "grad_norm": 0.6270786548860793, |
| "learning_rate": 6.387708231181229e-06, |
| "loss": 0.1108, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.3976608187134503, |
| "eval_loss": 0.1583063155412674, |
| "eval_runtime": 13.8218, |
| "eval_samples_per_second": 54.769, |
| "eval_steps_per_second": 6.873, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.5146198830409356, |
| "eval_loss": 0.15549179911613464, |
| "eval_runtime": 13.7868, |
| "eval_samples_per_second": 54.907, |
| "eval_steps_per_second": 6.891, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "grad_norm": 0.5175376687666863, |
| "learning_rate": 5.034296594080849e-06, |
| "loss": 0.1079, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "eval_loss": 0.15371112525463104, |
| "eval_runtime": 13.8138, |
| "eval_samples_per_second": 54.8, |
| "eval_steps_per_second": 6.877, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.7485380116959064, |
| "eval_loss": 0.15189678966999054, |
| "eval_runtime": 13.8351, |
| "eval_samples_per_second": 54.716, |
| "eval_steps_per_second": 6.867, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.8654970760233918, |
| "grad_norm": 0.5236246557796638, |
| "learning_rate": 3.6783192117952427e-06, |
| "loss": 0.1057, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.8654970760233918, |
| "eval_loss": 0.15082819759845734, |
| "eval_runtime": 13.8117, |
| "eval_samples_per_second": 54.808, |
| "eval_steps_per_second": 6.878, |
| "step": 160 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 255, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 40369975721984.0, |
| "train_batch_size": 5, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|