From 624d45bc5392da83fd1ec703f3b8467d24d8585f Mon Sep 17 00:00:00 2001 From: William Jeynes Date: Tue, 24 Mar 2026 18:29:40 +0000 Subject: [PATCH] Re-allow multithreading on service. Add results table --- agent/nodes/robertaMetrics.ts | 24 ++++++++++---------- supporting/RAGAS_Service/README.md | 13 +++++++++++ supporting/RAGAS_Service/ensemble_service.py | 10 ++++---- supporting/Wrapper/run.ts | 2 +- 4 files changed, 31 insertions(+), 18 deletions(-) create mode 100644 supporting/RAGAS_Service/README.md diff --git a/agent/nodes/robertaMetrics.ts b/agent/nodes/robertaMetrics.ts index 4e34ea2..1be834f 100644 --- a/agent/nodes/robertaMetrics.ts +++ b/agent/nodes/robertaMetrics.ts @@ -16,22 +16,22 @@ export const robertaMetrics: GraphNode = async (state) => const flscore = flresult.validProb - flresult.invalidProb; //Option 1: combining scores - // const score = lrscore * 0.3 + roscore * 0.5 + flscore * 0.3 + const score = lrscore * 0.3 + roscore * 0.5 + flscore * 0.3 //Option 2: majority voting - const rovote = roscore > 0.6 - const flvote = flscore > 0.94 - const lrvote = lrscore > 0.75 + // const rovote = roscore > 0.6 + // const flvote = flscore > 0.94 + // const lrvote = lrscore > 0.75 - let counter = 0 - if (rovote) counter++ - if (flvote) counter++ - if (lrvote) counter++ + // let counter = 0 + // if (rovote) counter++ + // if (flvote) counter++ + // if (lrvote) counter++ - let score = 0 - if (counter >= 2) { - score = 0.7 + lrscore + flscore + lrscore - } + // let score = 0 + // if (counter >= 2) { + // score = 0.7 + lrscore + flscore + lrscore + // } return { messages: [ new AIMessage("ROBERTA:" + score)] diff --git a/supporting/RAGAS_Service/README.md b/supporting/RAGAS_Service/README.md new file mode 100644 index 0000000..841a0fb --- /dev/null +++ b/supporting/RAGAS_Service/README.md @@ -0,0 +1,13 @@ +| Model | % Correct | % Valid taken forward|Used in ensemble|Link +|------------------------------------------------------------|-----------|----------------------|----------------|- +| Original | 53.22 | 61.72 | +| Original (RAGAS) | 56.01 | 57.73 | +| Roberta (base) | 75 | 70 | +| Roberta (Generated Data) | 76 | 71 | +| Roberta (Generated Data + Back Translation) | 74 | 71 | +| Roberta (Generated Data + Back Translation + Thresholding) | 77 | 90 |Y|[Here](https://huggingface.co/WillJeynes/LLMsForDisinformationAnalysis) +| Distilled Roberta | 72.73 | 69.57 | +| Flan | 79.17 | 85.71 |Y|[Here](https://huggingface.co/WillJeynes/LLMsForDisinformationAnalysis-Flan) +| Simple Regression Model | 74.77 | 85.71 |Y|[Here](https://huggingface.co/WillJeynes/LLMsForDisinformationAnalysis-Regression) +| Ensemble Model (weighted confidence score sum) | 84.21 | 83.33 | +| Ensemble Model (majority voting) | 80.2 | 95.12 | \ No newline at end of file diff --git a/supporting/RAGAS_Service/ensemble_service.py b/supporting/RAGAS_Service/ensemble_service.py index 031ae0c..a5c5b20 100644 --- a/supporting/RAGAS_Service/ensemble_service.py +++ b/supporting/RAGAS_Service/ensemble_service.py @@ -102,6 +102,11 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") flan_model.to(device) flan_model.eval() +label_token_ids = { + label: flan_tokenizer(label, add_special_tokens=False).input_ids[0] + for label in LABEL_TO_INT.keys() +} + def format_prompt(text: str) -> str: return ( @@ -204,11 +209,6 @@ def evaluate(req: EvalRequest): skip_special_tokens=True ) - label_token_ids = { - label: flan_tokenizer(label, add_special_tokens=False).input_ids[0] - for label in LABEL_TO_INT.keys() - } - label_logits = torch.tensor( [logits[0, tid].item() for tid in label_token_ids.values()] ) diff --git a/supporting/Wrapper/run.ts b/supporting/Wrapper/run.ts index 277bfad..7e362bb 100644 --- a/supporting/Wrapper/run.ts +++ b/supporting/Wrapper/run.ts @@ -17,7 +17,7 @@ const AGENT_NAME = process.env.AGENT ?? "agent"; */ const MODE = process.env.MODE ?? "claim"; -const MAX_CONCURRENCY = 1; +const MAX_CONCURRENCY = 5; const client = new Client({ apiUrl: API_URL });