From 00e1596be08281c4f7848e81c8cddb6f1aadd3a5 Mon Sep 17 00:00:00 2001 From: William Jeynes Date: Mon, 23 Mar 2026 15:45:18 +0000 Subject: [PATCH] tuned parameters for roberta_distilled? --- supporting/RAGAS_Service/train_roberta.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/supporting/RAGAS_Service/train_roberta.py b/supporting/RAGAS_Service/train_roberta.py index 7cf23a9..6e79c78 100644 --- a/supporting/RAGAS_Service/train_roberta.py +++ b/supporting/RAGAS_Service/train_roberta.py @@ -35,10 +35,11 @@ class WeightedTrainer(Trainer): logits = outputs.get("logits") # loss_fct = CrossEntropyLoss(weight=self.class_weights.to(logits.device)) - # loss_fct = CrossEntropyLoss( - # weight=self.class_weights.to(logits.device).to(logits.dtype) - # ) - loss_fct = CrossEntropyLoss() + loss_fct = CrossEntropyLoss( + weight=self.class_weights.to(logits.device).to(logits.dtype) + ) + # loss_fct = CrossEntropyLoss() + # print("DBG: Before loss") loss = loss_fct(logits, labels) # loss.backward() @@ -172,14 +173,14 @@ def main(): train_texts, truncation=True, padding=True, - max_length=512 + max_length=256 ) val_encodings = tokenizer( val_texts, truncation=True, padding=True, - max_length=512 + max_length=256 ) class TextDataset(torch.utils.data.Dataset): @@ -202,9 +203,9 @@ def main(): training_args = TrainingArguments( output_dir="./results", learning_rate=2e-5, - per_device_train_batch_size=16, - gradient_accumulation_steps=2, - num_train_epochs=10, + per_device_train_batch_size=32, + # gradient_accumulation_steps=2, + num_train_epochs=15, weight_decay=0.01, load_best_model_at_end=True, eval_strategy="epoch", @@ -236,8 +237,8 @@ def main(): for k, v in metrics.items(): print(f"{k}: {v}") - trainer.save_model("./roberta_classifier") - tokenizer.save_pretrained("./roberta_classifier") + trainer.save_model("./roberta_distilled_classifier") + tokenizer.save_pretrained("./roberta_distilled_classifier")