Improve response extraction

Use a slightly smaller model. Reduce concurreny. Be more clear in the prompts
Allow for configurable ranking server url. Delete old ragas call
2026-04-02 21:02:26 +01:00 · 2026-04-02 20:10:57 +01:00 · 2026-04-02 13:48:15 +01:00 · 2026-04-02 13:18:02 +01:00 · 2026-03-31 19:26:56 +01:00 · 2026-03-31 18:26:55 +01:00
18 changed files with 612 additions and 466 deletions
@@ -4,3 +4,4 @@ LANGSMITH_API_KEY=123456
 LANGSMITH_ENDPOINT=https://eu.api.smith.langchain.com
 SCRAPER_INSTANCE=https://example.com
 SCRAPER_PARAM_ANYTHING=else
+RANKING_URL=http://localhost:8000/evaluate
@@ -1,25 +1,28 @@
-import { HumanMessage, SystemMessage } from "@langchain/core/messages";
+import { SystemMessage } from "@langchain/core/messages";
 import { GraphNode } from "@langchain/langgraph";
 import { MessagesState } from "../state";
-import { ChatOpenAI } from "@langchain/openai"
+import { ChatOllama } from "@langchain/ollama";
 import { hydratePrompt } from "../prompts/hydratePrompt";
+import { logger } from "../utils/logger";

 export function createModelNode(tools: any, promptPath: string): GraphNode<typeof MessagesState> {
    return async (state) => {
        const sysPrompt = await hydratePrompt(promptPath, state);

-        const model = new ChatOpenAI({
-            model: "gpt-5-mini"
+        const model = new ChatOllama({
+            model: "llama3.1:8b-instruct-q4_K_M",
+            temperature: 0.3
        });
+
        const modelWithTools = model.bindTools(Object.values(tools));

        const response = await modelWithTools.invoke([
-            new SystemMessage(
-                sysPrompt
-            ),
+            new SystemMessage(sysPrompt),
            ...state.messages,
        ]);

+        logger.error(response);
+
        return {
            messages: [response]
        };
@@ -3,8 +3,16 @@ import { MessagesState } from "../state";
 import { AIMessage, BaseMessage } from "@langchain/core/messages";
 import { rankExampleTriggerEvents } from "../tools/retreiveExamples";

+function extractTE(text: string) {
+  const match = text.match(/<norm>([\s\S]*?)<\/norm>/);
+  if (!match) throw new Error("Nothing found between <norm> tags");
+  return match[1].trim();
+}
+
+
 export const triggerEventSetup: GraphNode<typeof MessagesState> = async (state) => {
-  let nc = state?.messages?.at(-1)?.content ?? "" //keep a copy of normalized trigger event. Again two things, womp womp
+  let raw = state?.messages?.at(-1)?.content ?? "" //keep a copy of normalized trigger event. Again two things, womp womp
+  let nc = extractTE(raw.toString())

  //Now give in-context examples. hopwfully we can self-teach?
  let similarityResults = await rankExampleTriggerEvents(state.disinformationTitle)
@@ -1,32 +1,60 @@
 import { GraphNode } from "@langchain/langgraph";
 import { MessagesState, ProposedTriggerEventArray } from "../state";
 import { logger } from "../utils/logger";
-import { queryScraper } from "../tools/webSearch";
-import { rankAndDisplayData } from "../tools/triggerEventTools";
+import { jsonrepair } from 'jsonrepair';
+
+function extractJSON(text: string) {
+  const match = text.match(/<json>([\s\S]*?)<\/json>/);
+  if (!match) throw new Error("No JSON found between <json> tags");
+  return match[1].trim();
+}

 export const verificationSetup: GraphNode<typeof MessagesState> = async (state) => {
-  //this is kinda doing two things, but having two nodes for it seems overkill
-
  if (state.proposedTriggerEvent == undefined) {
-    logger.warn("No trigger events in memory, parsing")
+    logger.warn("No trigger events in memory, parsing");

-    let genResponse = state.messages.at(-1)?.content.toString() ?? "";
-    const parsed = ProposedTriggerEventArray.parse(JSON.parse(genResponse));
+    const genResponse = state.messages.at(-1)?.content.toString() ?? "";

-    for (let i = 0; i < parsed.length; i++) {
-      const search = parsed[i].SearchQuery
-      // const data = await queryScraper(search);
-      // const output = await rankAndDisplayData(data, search);
+    let repaired: string;
+    try {
+      let extracted = extractJSON(genResponse)
+      repaired = jsonrepair(extracted);
+    } catch (repairErr: any) {
+      logger.error("Failed to repair JSON from LLM response.");
+      logger.error("Original LLM response:\n%s", genResponse);
+      throw new Error(`JSON repair failed: ${repairErr.message}`);
+    }

-      // parsed[i].context = output;
-      parsed[i].context = "NONE"
+    let parsed;
+    try {
+      const json = JSON.parse(repaired);
+
+      if (Array.isArray(json)) {
+        parsed = ProposedTriggerEventArray.parse(json);
+      } else {
+        // try grab first value
+        const firstValue = Object.values(json)[0];
+
+        if (Array.isArray(firstValue)) {
+          parsed = ProposedTriggerEventArray.parse(firstValue);
+        } else {
+          logger.error("No array found in JSON after parsing.");
+          logger.error("Repaired JSON:\n%s", repaired);
+          logger.error("Original LLM response:\n%s", genResponse);
+          throw new Error("No array found in JSON structure");
+        }
+      }
+    } catch (parseErr: any) {
+      logger.error("Failed to parse LLM response to JSON or validate array.");
+      logger.error("Repaired JSON:\n%s", repaired);
+      logger.error("Original LLM response:\n%s", genResponse);
+      throw new Error(`Parsing failed: ${parseErr.message}`);
    }

    return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
-  }
-  else {
-    logger.info("Trigger event index %s", state.proposedTriggerEventIndex+1)
+  } else {
+    logger.info("Trigger event index %s", state.proposedTriggerEventIndex + 1);

-    return { proposedTriggerEvent: state.proposedTriggerEvent, proposedTriggerEventIndex: state.proposedTriggerEventIndex+1 };
+    return { proposedTriggerEvent: state.proposedTriggerEvent, proposedTriggerEventIndex: state.proposedTriggerEventIndex + 1 };
  }
 };
@@ -17,6 +17,7 @@
    "@langchain/core": "^1.1.17",
    "@langchain/langgraph": "^1.1.2",
    "@langchain/langgraph-sdk": "^1.5.5",
+    "@langchain/ollama": "^1.2.6",
    "@langchain/openai": "^1.2.3",
    "axios": "^1.13.5",
    "compute-cosine-similarity": "^1.1.0",
@@ -24,6 +25,7 @@
    "dotenv": "^17.2.3",
    "exponential-backoff": "^3.1.3",
    "fs": "^0.0.1-security",
+    "jsonrepair": "^3.13.3",
    "langchain": "^1.2.14",
    "selenium-webdriver": "^4.40.0",
    "tldts": "^7.0.23",
@@ -16,4 +16,7 @@ Relevent examples are included in preceeding messages, use these as exact inspir
 The claim to normalize is:
 ###TITLE###

-Produce no other text other than the condensed claim.
+Produce no other text other than the condensed claim, surrounded <norm></norm>
+
+For example: BREAKING: the sky is green!
+Becomes: <norm>The sky is green</norm>
@@ -1,9 +0,0 @@
-Could the following real-world event:
-###TECLAIM###
-
-Be a trigger for the following disinformation:
-###TITLE###
-
-Respond with "RELATION", followed by : followed by a confidence score (VERYHIGH, HIGH, MEDIUM, LOW, VERYLOW) followed by : followed by the reason. Use no other words, just return the score and reason in format.
-
-Ignore wether the event happened or not, purely consider the likiness of causation
@@ -3,9 +3,10 @@ Once the information has been created as below, a dataset can be created to feed

 There is a false disinformation claim circulating:
 ###NTITLE###
-Produce up-to 5 specific events that happened that have led to the spread of this disinformation.
+Produce up-to 5 specific "trigger events" that happened that could have led to the spread of this disinformation.

 Remember the time frame of the disinformation campaign: ###CDATE###
+Include no information or events that would not have been available at the time. 

 Produce no more text other than the json.

@@ -13,8 +14,29 @@ Include a concise but specific search query that can be looked up on a search en

 Include a url to a source for your trigger event (not a web search, a specific url from a reputuable source). Do not use OAI cite, include url as text in response.

-Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url".
+Include the date that the event happened ("March 2022" for exmaple)
+
+Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,Date".
+Return ONLY JSON, no extra text. Wrap it like this:
+<json>
+[
+    {
+        "Event": "Example"
+        ...
+    }
+]
+</json>

 Multiple tool invocations should be requested at once, if applicable.
+Use your abilities to look between the lines and produce some insightful analysis, thinking both short and long term.
+
+Events will be reordered as part of processing, each statement must stand alone

 The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
+Analysis should only be completed for proposed events that would graner >0.7 points
+
+Since URLs change frequently, use tools to retreive up to date informaiton everytime, provided examples or existing knowledge will be wrong or out of date.
+
+Remember to return just json enclosed by <json></json>
+
+Lets go through it step by step
@@ -1,8 +0,0 @@
-Do the search results cited below
-###TESEARCH###
-Support the idea that the following happened:
-###TECLAIM###
-
-Respond with "CONFIDENCE", followed by : followed by a confidence score (VERYHIGH, HIGH, MEDIUM, LOW, VERYLOW) followed by : followed by the reason. Use no other words, just return the score and reason in format.
-
-Dates can be off by a few days, that would still be valid
@@ -9,6 +9,7 @@ export const ProposedTriggerEvent = z.object({
  ReasoningWhyRelevant: z.string(),
  SearchQuery: z.string(),
  Url: z.url(),
+  Date: z.string(),
  context: z.string().optional(),
  score: z.number().optional()
 })
@@ -7,7 +7,7 @@ export async function evaluateWithEnsemble({
  answer: string;
  method: string
 }): Promise<{ validProb: number; invalidProb: number; }> {
-  const res = await axios.post("http://localhost:8000/evaluate", {
+    const res = await axios.post(process.env.RANKING_URL ?? "http://localhost:8000/evaluate", {
    answer,
    method
  }, {timeout: 0});
@@ -18,11 +18,15 @@ export async function evaluateWithEnsemble({
  return {validProb, invalidProb};
 }

-// let res = await evaluateWithRoberta({answer: "High-profile political downplaying of COVID-19 (examples: President Trump saying 'it will go away' in March–August 2020)"});
+// import dotenv from "dotenv";
+
+// dotenv.config();
+
+// let res = await evaluateWithEnsemble({method:"flan" ,answer: "High-profile political downplaying of COVID-19 (examples: President Trump saying 'it will go away' in March–August 2020)"});
 // console.log(res)

-// res = await evaluateWithRoberta({answer: "Multiple mirrored reuploads (2020–2023) put the clip on other channels with titles implying it was a genuine 1970s public information film."});
+// res = await evaluateWithEnsemble({method:"roberta" ,answer: "Multiple mirrored reuploads (2020–2023) put the clip on other channels with titles implying it was a genuine 1970s public information film."});
 // console.log(res)

-// res = await evaluateWithRoberta({answer: "The COVID-19 Pandemic"});
+// res = await evaluateWithEnsemble({method:"logreg" ,answer: "The COVID-19 Pandemic"});
 // console.log(res)
@@ -1,22 +0,0 @@
-import axios from "axios";
-
-export async function evaluateWithRagas({
-  question,
-  answer,
-  contexts,
-}: {
-  question: string;
-  answer: string;
-  contexts: string[];
-}) {
-  const res = await axios.post("http://localhost:8001/evaluate", {
-    question,
-    answer,
-    contexts,
-  });
-
-  return res.data;
-}
-
-// let res = await evaluateWithRagas({question: "Who was Bill Nye", answer: "Bill Nye was a Scientist", contexts: ["Bill nye was a Scientist"]});
-// console.log(res)
@@ -15,6 +15,8 @@ const CACHE_PATH = "../data/csv.cache.json";

 const JSONL_PATH = "../data/input.jsonl"

+const BM25_MIN_DOCS = 3;
+
 type EmbeddingCache = {
  rawtexts: string[];
  cleantexts: string[];
@@ -287,8 +289,20 @@ async function embedText(text: string): Promise<number[]> {
 }

 function buildBM25(texts: string[]) {
-  logger.info("Building BM25 index (%s docs)...", texts.length);
+  let paddedTexts = texts;

+  if (texts.length < BM25_MIN_DOCS) {
+    const needed = BM25_MIN_DOCS - texts.length;
+    logger.error(
+      "Corpus too small for BM25 (%s docs, need %s+), padding with %s dummy doc(s)",
+      texts.length,
+      BM25_MIN_DOCS,
+      needed
+    );
+    paddedTexts = [...texts, ...Array(needed).fill("placeholder dummy document")];
+  }
+
+  logger.info("Building BM25 index (%s docs)...", paddedTexts.length);
  const bm25 = bm25Factory();

  bm25.defineConfig({
@@ -302,7 +316,7 @@ function buildBM25(texts: string[]) {
    nlp.tokens.removeWords,
  ]);

-  texts.forEach((text, i) => {
+  paddedTexts.forEach((text, i) => {
    bm25.addDoc({ text }, i);
  });

@@ -1,32 +1,95 @@
 import { Builder, Browser } from "selenium-webdriver";
 import firefox from "selenium-webdriver/firefox";
+import { backOff } from "exponential-backoff";
+import { logger } from "../utils/logger";

-export async function extractWebpageContent(url: string) : Promise<string[]>{
+export async function extractWebpageContent(url: string): Promise<string[]> {
+  try {
+    const response = await backOff(async () => {
+      return await extractWebpageContentWorker(url);
+    }, {
+      numOfAttempts: 10,
+      startingDelay: 500,
+      timeMultiple: 2,
+      jitter: "full",
+      maxDelay: 50000,
+    });
+    return response;
+  } catch (err: any) {
+    logger.error(`Failed out of retry loop for URL "${url}", returning placeholder to pipeline`);
+    return ["API EXCEPTION"];
+  }
+}
+
+async function extractWebpageContentWorker(url: string): Promise<string[]> {
+  let driver;
+  try {
    const options = new firefox.Options();
    options.addArguments("--headless");
+    options.addArguments("--disable-gpu");
+    options.addArguments("--no-sandbox"); // Linux sandbox issues
+    options.addArguments("--disable-dev-shm-usage"); // /dev/shm issues
+    driver = await new Builder()
+      .forBrowser(Browser.FIREFOX)
+      .setFirefoxOptions(options)
+      .build();
+  } catch (err: any) {
+    const desc = `Failed to launch Firefox driver: ${err.message}`;
+    logger.error(desc);
+    throw new Error(desc);
+  }
+
+  try {
+    try {
+      await driver.get(url);
+    } catch (err: any) {
+      const desc = `Failed to navigate to URL "${url}": ${err.message}`;
+      logger.error(desc);
+      throw new Error(desc);
+    }

-    let driver = await new Builder().forBrowser(Browser.FIREFOX).setFirefoxOptions(options).build()
    try {
-        await driver.get(url)
      await driver.wait(async () => {
        return await driver.executeScript(
          "return document.readyState === 'complete'"
        );
      }, 5000);
+    } catch (err: any) {
+      logger.error(`Page load timed out for "${url}", attempting to read partial content: ${err.message}`);
+      // do not throw, attempt to read
+    }

-        const readableText = await driver.executeScript(
+    let readableText: string;
+    try {
+      readableText = await driver.executeScript(
        "return document.body.innerText;"
      ) as string;
+    } catch (err: any) {
+      const desc = `Failed to extract page text from "${url}": ${err.message}`;
+      logger.error(desc);
+      throw new Error(desc);
+    }

    const filteredLines = readableText
      .split(/\r?\n/)
      .map(line => line.trim())
      .filter(line => line.split(/\s+/).length > 1);

+    if (filteredLines.length === 0) {
+      const desc = `No content extracted from "${url}"`;
+      logger.error(desc);
+      throw new Error(desc);
+    }
+
    return filteredLines;
  } finally {
-        await driver.quit()
+    try {
+      await driver.quit();
+    } catch (err: any) {
+      logger.error(`Failed to quit Firefox driver cleanly: ${err.message}`);
+    }
  }
 }

-//console.log(await extractWebpageContent("https://www.bbc.co.uk/news/live/c74wd01egvyt"))
+// console.log(await extractWebpageContent("https://www.bbc.co.uk/news/live/c74wd01egvyt"))
+// console.log(await extractWebpageContent("https://badcertificate.int.jeynes.uk/"))
@@ -92,7 +92,7 @@ LABEL_TO_INT = {v: k for k, v in INT_TO_LABEL.items()}
 flan_tokenizer = AutoTokenizer.from_pretrained(FLAN_PATH)
 flan_model = AutoModelForSeq2SeqLM.from_pretrained(FLAN_PATH)

-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+device = torch.device("cpu")
 flan_model.to(device)
 flan_model.eval()

@@ -9,6 +9,7 @@ datasets
 # ROBERTA
 scikit-learn
 transformers[torch]
+sentence_transformers

 # Utils
 numpy
@@ -17,7 +17,7 @@ const AGENT_NAME = process.env.AGENT ?? "agent";
 */
 const MODE = process.env.MODE ?? "claim";

-const MAX_CONCURRENCY = 5;
+const MAX_CONCURRENCY = 1;

 const client = new Client({ apiUrl: API_URL });

@@ -118,7 +118,7 @@ async function processRecord(record: any): Promise<ResultRecord> {
      input: buildAgentInput(record),
      streamMode: "values",
      config: {
-        recursion_limit: 50
+        recursion_limit: 100
      }
    });
Author	SHA1	Message	Date
William Jeynes	b37799b3d2	Improve response extraction	2026-04-02 21:02:26 +01:00
William Jeynes	10f2644408	Use a slightly smaller model. Reduce concurreny. Be more clear in the prompts	2026-04-02 20:10:57 +01:00
William Jeynes	7e586fe17d	Allow for configurable ranking server url. Delete old ragas call	2026-04-02 13:48:15 +01:00
William Jeynes	7e37a22058	Switch to actual instruction model. For debug, log entire object.	2026-04-02 13:18:02 +01:00
William Jeynes	2ed47980ef	Add better error handling to LLM output response	2026-03-31 19:26:56 +01:00
William Jeynes	01b04dd73e	use a model we know has tool calling capabilities	2026-03-31 18:26:55 +01:00
William Jeynes	593baf9b15	add extra options	2026-03-31 17:15:55 +01:00
William Jeynes	893829e599	Switch to CPU only, as to not confuse GPU	2026-03-31 16:09:41 +01:00
William Jeynes	36c30a427d	update deps. Install ollama for lang chain. Update model to deepseek	2026-03-31 16:08:28 +01:00
William Jeynes	b610e8c989	Add sentence transformers to requirements for ensemble service	2026-03-31 15:52:14 +01:00
William Jeynes	f8d4155b7c	Add more robust parsing of LLM JSON output	2026-03-27 11:09:59 +00:00
William Jeynes	5e374a8bd6	Fix errors seen during longer runs: selenium exceptions, insecure certificates, recusrsion limit exceeded, BM25 document corpus too small	2026-03-26 12:22:13 +00:00
William Jeynes	fbc688b8f9	add date to returned data	2026-03-25 22:37:14 +00:00