3 Commits

Author SHA1 Message Date
William Jeynes 00e1596be0 tuned parameters for roberta_distilled? 2026-03-23 15:45:18 +00:00
William Jeynes 070aab6a5c Actually we need to go the other way 2026-03-23 14:03:06 +00:00
William Jeynes bff5423f3d testing code for deberta, need to run on GPU 2026-03-22 16:55:21 +00:00
+34 -15
View File
@@ -1,6 +1,6 @@
from sklearn.utils import compute_class_weight from sklearn.utils import compute_class_weight
from torch.nn import CrossEntropyLoss from torch.nn import CrossEntropyLoss
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer, AutoModelForSequenceClassification
import torch import torch
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
@@ -10,7 +10,7 @@ import csv
import numpy as np import numpy as np
NUM_CLASSES = 3 NUM_CLASSES = 3
model_name = "roberta-base" model_name = "distilbert/distilroberta-base"
LABEL_PRIORITY = [ LABEL_PRIORITY = [
("PERFECT", 0), ("PERFECT", 0),
@@ -29,12 +29,21 @@ class WeightedTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False, **kwargs): def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
labels = inputs.get("labels") labels = inputs.get("labels")
# print("DBG: Before forward")
outputs = model(**inputs) outputs = model(**inputs)
# print("DBG: After forward")
logits = outputs.get("logits") logits = outputs.get("logits")
loss_fct = CrossEntropyLoss(weight=self.class_weights.to(logits.device)) # loss_fct = CrossEntropyLoss(weight=self.class_weights.to(logits.device))
loss_fct = CrossEntropyLoss(
weight=self.class_weights.to(logits.device).to(logits.dtype)
)
# loss_fct = CrossEntropyLoss()
# print("DBG: Before loss")
loss = loss_fct(logits, labels) loss = loss_fct(logits, labels)
# loss.backward()
# print("DBG: After loss")
return (loss, outputs) if return_outputs else loss return (loss, outputs) if return_outputs else loss
def label_to_int(extra_info: str) -> int: def label_to_int(extra_info: str) -> int:
@@ -120,17 +129,23 @@ def main():
print("Current device:", torch.cuda.current_device() if torch.cuda.is_available() else "CPU") print("Current device:", torch.cuda.current_device() if torch.cuda.is_available() else "CPU")
texts, labels = load_dataset_from_csv("../../data/classify.csv") texts, labels = load_dataset_from_csv("../../data/classify.csv")
tokenizer = RobertaTokenizer.from_pretrained(model_name, hidden_dropout_prob=0.2,attention_probs_dropout_prob=0.2) # tokenizer = RobertaTokenizer.from_pretrained(model_name, hidden_dropout_prob=0.2,attention_probs_dropout_prob=0.2)
model = RobertaForSequenceClassification.from_pretrained( # model = RobertaForSequenceClassification.from_pretrained(
# model_name,
# num_labels=NUM_CLASSES
# )
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
model_name, model_name,
num_labels=NUM_CLASSES num_labels=NUM_CLASSES
) )
for param in model.roberta.parameters(): # for param in model.deberta.parameters():
param.requires_grad = False # param.requires_grad = True
for param in model.roberta.encoder.layer[-6:].parameters(): # for param in model.deberta.encoder.layer[-6:].parameters():
param.requires_grad = True # param.requires_grad = True
print("Dataset size:", len(texts)) print("Dataset size:", len(texts))
print("Label distribution:") print("Label distribution:")
@@ -140,7 +155,8 @@ def main():
texts, texts,
labels, labels,
test_size=0.2, test_size=0.2,
random_state=42 random_state=42,
stratify=labels
) )
@@ -173,6 +189,7 @@ def main():
self.labels = labels self.labels = labels
def __getitem__(self, idx): def __getitem__(self, idx):
# print(f"DBG: Loading item {idx}")
item = { item = {
key: torch.tensor(val[idx]) key: torch.tensor(val[idx])
for key, val in self.encodings.items() for key, val in self.encodings.items()
@@ -187,7 +204,8 @@ def main():
output_dir="./results", output_dir="./results",
learning_rate=2e-5, learning_rate=2e-5,
per_device_train_batch_size=32, per_device_train_batch_size=32,
num_train_epochs=5, # gradient_accumulation_steps=2,
num_train_epochs=15,
weight_decay=0.01, weight_decay=0.01,
load_best_model_at_end=True, load_best_model_at_end=True,
eval_strategy="epoch", eval_strategy="epoch",
@@ -195,7 +213,8 @@ def main():
metric_for_best_model="f1", metric_for_best_model="f1",
greater_is_better=True, greater_is_better=True,
dataloader_num_workers=4, dataloader_num_workers=4,
dataloader_pin_memory=True dataloader_pin_memory=True,
# warmup_steps=100,
) )
train_dataset = TextDataset(train_encodings, train_labels) train_dataset = TextDataset(train_encodings, train_labels)
@@ -218,8 +237,8 @@ def main():
for k, v in metrics.items(): for k, v in metrics.items():
print(f"{k}: {v}") print(f"{k}: {v}")
trainer.save_model("./roberta_classifier") trainer.save_model("./roberta_distilled_classifier")
tokenizer.save_pretrained("./roberta_classifier") tokenizer.save_pretrained("./roberta_distilled_classifier")