| { |
| "teacher_model": "gpt2-large", |
| "student_model": "distilgpt2", |
| "tokenizer_model": "gpt2", |
| "batch_size": 4, |
| "max_length": 256, |
| "num_epochs": 3, |
| "learning_rate": 5e-05, |
| "distillation_alpha": 0.7, |
| "student_lm_beta": 0.3, |
| "dataset_file": "/home/divyarani.k/Internship-2025-Team1/Distillation/Model_Exploration/Distillation-Gpt2-DistilGpt/F-Tallyqa.csv", |
| "train_samples": 106414, |
| "val_samples": 11824, |
| "eval_samples": 100, |
| "best_validation_loss": 1.8084159012737713, |
| "evaluation_metrics": { |
| "rouge1_teacher_student_before": 0.3164868777314132, |
| "rouge2_teacher_student_before": 0.07499568529627544, |
| "rougeL_teacher_student_before": 0.18821303864954625, |
| "cosine_similarity_teacher_student_before": 0.36006441712379456, |
| "rouge1_teacher_student_after": 0.3048857745421187, |
| "rouge2_teacher_student_after": 0.06675678541680866, |
| "rougeL_teacher_student_after": 0.18748940481226373, |
| "cosine_similarity_teacher_student_after": 0.4749462604522705, |
| "rouge1_student_before_student_after": 0.2562567617366068, |
| "rouge2_student_before_student_after": 0.0373565907888051, |
| "rougeL_student_before_student_after": 0.14784319087594555, |
| "cosine_similarity_student_before_student_after": 0.33844634890556335, |
| "avg_teacher_time": 0.9765390038490296, |
| "avg_student_before_time": 0.27398345947265623, |
| "avg_student_after_time": 0.29969048976898194 |
| } |
| } |