diff --git a/agent/nodes/robertaMetrics.ts b/agent/nodes/robertaMetrics.ts index f6851ea..2763a39 100644 --- a/agent/nodes/robertaMetrics.ts +++ b/agent/nodes/robertaMetrics.ts @@ -9,8 +9,8 @@ export const robertaMetrics: GraphNode = async (state) => const result = await evaluateWithRoberta({answer}) let score = 0; - if (result.validProb > result.invalidProb) { - score = 0.7 + ((result.validProb - result.invalidProb)*0.3); + if (result.validProb > (result.invalidProb+0.4)) { + score = 0.7 + ((result.validProb - (result.invalidProb+0.4))*0.3); } return { diff --git a/supporting/RAGAS_Service/.gitignore b/supporting/RAGAS_Service/.gitignore index b9df52e..0098cc5 100644 --- a/supporting/RAGAS_Service/.gitignore +++ b/supporting/RAGAS_Service/.gitignore @@ -2,6 +2,7 @@ results/ roberta_classifier/ roberta_classifier*/ +output* # -- THEIRS -- # Byte-compiled / optimized / DLL files diff --git a/supporting/RAGAS_Service/train_roberta.py b/supporting/RAGAS_Service/train_roberta.py index b338e0b..b0d5ff8 100644 --- a/supporting/RAGAS_Service/train_roberta.py +++ b/supporting/RAGAS_Service/train_roberta.py @@ -120,7 +120,7 @@ def main(): print("Current device:", torch.cuda.current_device() if torch.cuda.is_available() else "CPU") texts, labels = load_dataset_from_csv("../../data/classify.csv") - tokenizer = RobertaTokenizer.from_pretrained(model_name) + tokenizer = RobertaTokenizer.from_pretrained(model_name, hidden_dropout_prob=0.2,attention_probs_dropout_prob=0.2) model = RobertaForSequenceClassification.from_pretrained( model_name, num_labels=NUM_CLASSES @@ -129,13 +129,13 @@ def main(): for param in model.roberta.parameters(): param.requires_grad = False - for param in model.roberta.encoder.layer[-3:].parameters(): + for param in model.roberta.encoder.layer[-6:].parameters(): param.requires_grad = True print("Dataset size:", len(texts)) print("Label distribution:") print(Counter(labels)) - + train_texts, val_texts, train_labels, val_labels = train_test_split( texts, labels, @@ -185,9 +185,9 @@ def main(): training_args = TrainingArguments( output_dir="./results", - learning_rate=1e-5, - per_device_train_batch_size=8, - num_train_epochs=15, + learning_rate=2e-5, + per_device_train_batch_size=32, + num_train_epochs=5, weight_decay=0.01, load_best_model_at_end=True, eval_strategy="epoch", diff --git a/supporting/scorer/views/stats.py b/supporting/scorer/views/stats.py index a2a2585..5c623d2 100644 --- a/supporting/scorer/views/stats.py +++ b/supporting/scorer/views/stats.py @@ -118,8 +118,14 @@ def render(): total = sum(confidence_counter.values()) correct = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"] + confidence_counter["Correct-FALSE"] + goodkept = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"] + allkept = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"] + confidence_counter["Over-confident"] + + corr_percent = (correct / total) * 100 + kept_percent = (goodkept / allkept) * 100 st.markdown(f"**Correct: {corr_percent:.2f}% ({correct}/{total})**") + st.markdown(f"**Kept: {kept_percent:.2f}% ({goodkept}/{allkept})**") st.markdown(f"Duplicates: {dup_counter}") st.pyplot(fig, width=500)