diff --git a/agent/nodes/robertaMetrics.ts b/agent/nodes/robertaMetrics.ts
index f6851ea..2763a39 100644
--- a/agent/nodes/robertaMetrics.ts
+++ b/agent/nodes/robertaMetrics.ts
@@ -9,8 +9,8 @@ export const robertaMetrics: GraphNode<typeof MessagesState> = async (state) =>
   const result = await evaluateWithRoberta({answer})
   
   let score = 0;
-  if (result.validProb > result.invalidProb) {
-    score = 0.7 + ((result.validProb - result.invalidProb)*0.3);
+  if (result.validProb > (result.invalidProb+0.4)) {
+    score = 0.7 + ((result.validProb - (result.invalidProb+0.4))*0.3);
   }
   
   return {
diff --git a/supporting/RAGAS_Service/.gitignore b/supporting/RAGAS_Service/.gitignore
index b9df52e..0098cc5 100644
--- a/supporting/RAGAS_Service/.gitignore
+++ b/supporting/RAGAS_Service/.gitignore
@@ -2,6 +2,7 @@
 results/
 roberta_classifier/
 roberta_classifier*/
+output*
 
 # -- THEIRS --
 # Byte-compiled / optimized / DLL files
diff --git a/supporting/RAGAS_Service/train_roberta.py b/supporting/RAGAS_Service/train_roberta.py
index b338e0b..b0d5ff8 100644
--- a/supporting/RAGAS_Service/train_roberta.py
+++ b/supporting/RAGAS_Service/train_roberta.py
@@ -120,7 +120,7 @@ def main():
     print("Current device:", torch.cuda.current_device() if torch.cuda.is_available() else "CPU")
     texts, labels = load_dataset_from_csv("../../data/classify.csv")
 
-    tokenizer = RobertaTokenizer.from_pretrained(model_name)
+    tokenizer = RobertaTokenizer.from_pretrained(model_name, hidden_dropout_prob=0.2,attention_probs_dropout_prob=0.2)
     model = RobertaForSequenceClassification.from_pretrained(
         model_name,
         num_labels=NUM_CLASSES
@@ -129,13 +129,13 @@ def main():
     for param in model.roberta.parameters():
         param.requires_grad = False
 
-    for param in model.roberta.encoder.layer[-3:].parameters():
+    for param in model.roberta.encoder.layer[-6:].parameters():
         param.requires_grad = True
 
     print("Dataset size:", len(texts))
     print("Label distribution:")
     print(Counter(labels))
-
+    
     train_texts, val_texts, train_labels, val_labels = train_test_split(
         texts,
         labels,
@@ -185,9 +185,9 @@ def main():
 
     training_args = TrainingArguments(
         output_dir="./results",
-        learning_rate=1e-5,
-        per_device_train_batch_size=8,
-        num_train_epochs=15,
+        learning_rate=2e-5,
+        per_device_train_batch_size=32,
+        num_train_epochs=5,
         weight_decay=0.01,
         load_best_model_at_end=True,
         eval_strategy="epoch",
diff --git a/supporting/scorer/views/stats.py b/supporting/scorer/views/stats.py
index a2a2585..5c623d2 100644
--- a/supporting/scorer/views/stats.py
+++ b/supporting/scorer/views/stats.py
@@ -118,8 +118,14 @@ def render():
             total = sum(confidence_counter.values())
             correct = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"] + confidence_counter["Correct-FALSE"]
 
+            goodkept = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"]
+            allkept = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"] + confidence_counter["Over-confident"]
+
+
             corr_percent = (correct / total) * 100
+            kept_percent = (goodkept / allkept) * 100
             st.markdown(f"**Correct: {corr_percent:.2f}% ({correct}/{total})**")
+            st.markdown(f"**Kept: {kept_percent:.2f}% ({goodkept}/{allkept})**")
             st.markdown(f"Duplicates: {dup_counter}")
             st.pyplot(fig, width=500)