{ "best_metric": 0.678714859437751, "best_model_checkpoint": "./nlu_finetuned_models/rte/bert-base-uncased_lr1e-05/checkpoint-846", "epoch": 10.0, "eval_steps": 500, "global_step": 1410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5140562248995983, "eval_loss": 0.6864989995956421, "eval_runtime": 0.2465, "eval_samples_per_second": 1010.112, "eval_steps_per_second": 64.907, "step": 141 }, { "epoch": 2.0, "eval_accuracy": 0.6144578313253012, "eval_loss": 0.6658010482788086, "eval_runtime": 0.2411, "eval_samples_per_second": 1032.975, "eval_steps_per_second": 66.376, "step": 282 }, { "epoch": 3.0, "eval_accuracy": 0.642570281124498, "eval_loss": 0.7156305909156799, "eval_runtime": 0.2406, "eval_samples_per_second": 1035.061, "eval_steps_per_second": 66.51, "step": 423 }, { "epoch": 3.546099290780142, "grad_norm": 11.888811111450195, "learning_rate": 6.867924528301887e-06, "loss": 0.5575, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.6385542168674698, "eval_loss": 0.8045499920845032, "eval_runtime": 0.2541, "eval_samples_per_second": 979.981, "eval_steps_per_second": 62.971, "step": 564 }, { "epoch": 5.0, "eval_accuracy": 0.6465863453815262, "eval_loss": 1.0082933902740479, "eval_runtime": 0.2462, "eval_samples_per_second": 1011.391, "eval_steps_per_second": 64.989, "step": 705 }, { "epoch": 6.0, "eval_accuracy": 0.678714859437751, "eval_loss": 1.0545072555541992, "eval_runtime": 0.2429, "eval_samples_per_second": 1025.251, "eval_steps_per_second": 65.88, "step": 846 }, { "epoch": 7.0, "eval_accuracy": 0.6706827309236948, "eval_loss": 1.2696497440338135, "eval_runtime": 0.2394, "eval_samples_per_second": 1039.968, "eval_steps_per_second": 66.825, "step": 987 }, { "epoch": 7.092198581560283, "grad_norm": 14.514544486999512, "learning_rate": 3.0943396226415094e-06, "loss": 0.1723, "step": 1000 }, { "epoch": 8.0, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.419777512550354, "eval_runtime": 0.2419, "eval_samples_per_second": 1029.393, "eval_steps_per_second": 66.146, "step": 1128 }, { "epoch": 9.0, "eval_accuracy": 0.6546184738955824, "eval_loss": 1.497279405593872, "eval_runtime": 0.2373, "eval_samples_per_second": 1049.212, "eval_steps_per_second": 67.419, "step": 1269 }, { "epoch": 10.0, "eval_accuracy": 0.6586345381526104, "eval_loss": 1.5089062452316284, "eval_runtime": 0.2374, "eval_samples_per_second": 1049.047, "eval_steps_per_second": 67.409, "step": 1410 }, { "epoch": 10.0, "step": 1410, "total_flos": 1449010137410880.0, "train_loss": 0.2757559431360123, "train_runtime": 115.2955, "train_samples_per_second": 194.37, "train_steps_per_second": 12.229 } ], "logging_steps": 500, "max_steps": 1410, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1449010137410880.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }