Add self improvement pattern with two new prompt nodes

Fix errors seen during longer runs: selenium exceptions, insecure certificates, recusrsion limit exceeded, BM25 document corpus too small
add date to returned data
2026-03-26 14:44:48 +00:00 · 2026-03-26 12:22:13 +00:00 · 2026-03-25 22:37:14 +00:00
11 changed files with 240 additions and 44 deletions
@@ -11,13 +11,18 @@ import { loopEndConditional } from "./conditionals/loop_end";
 import { sort } from "./nodes/sort";
 import { triggerEventSetup } from "./nodes/triggerEventSetup";
 import { createEnsembleNode } from "./nodes/ensembleNode";
+import { selfEvalSetup } from "./nodes/selfEvalSetup";

 const triggerEventToolNode = createToolNode(triggerEventToolsByName);
+const peToolNode = createToolNode(triggerEventToolsByName);

 const normalisationModel = createModelNode([], "normalization.txt");
 const triggerEventModel = createModelNode(triggerEventToolsByName, "trigger.txt");
+const evaluationModel = createModelNode([], "eval.txt");
+const peModel = createModelNode(triggerEventToolsByName, "posteval.txt");

-const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name);
+const triggerEventToolConditional = createToolConditional("triggerEventToolNode", selfEvalSetup.name);
+const peToolConditional = createToolConditional("peToolNode", verificationSetup.name);

 const roNode = createEnsembleNode("ROBERTA", "roberta");
 const flNode = createEnsembleNode("FLAN", "flan");
@@ -33,6 +38,12 @@ const agent = new StateGraph(MessagesState)
  .addNode("triggerEventToolNode", triggerEventToolNode)
  .addNode("triggerEventModel", triggerEventModel)

+  .addNode(selfEvalSetup.name, selfEvalSetup)
+  .addNode("evaluationModel", evaluationModel)
+  
+  .addNode("peToolNode", peToolNode)
+  .addNode("peModel", peModel)
+
  .addNode(verificationSetup.name, verificationSetup)

  .addNode("roNode", roNode)
@@ -49,9 +60,16 @@ const agent = new StateGraph(MessagesState)
  .addEdge(triggerEventSetup.name, "triggerEventModel")
  
  // @ts-expect-error
-  .addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
+  .addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", selfEvalSetup.name])
  .addEdge("triggerEventToolNode", "triggerEventModel")
  
+  .addEdge(selfEvalSetup.name, "evaluationModel")
+  .addEdge("evaluationModel", "peModel")
+
+  // @ts-expect-error
+  .addConditionalEdges("peModel", peToolConditional, ["peToolNode", verificationSetup.name])
+  .addEdge("peToolNode", "peModel")
+  
  .addEdge(verificationSetup.name, "roNode")
  .addEdge(verificationSetup.name, "flNode")
  .addEdge(verificationSetup.name, "lrNode")
@@ -0,0 +1,21 @@
+import { GraphNode } from "@langchain/langgraph";
+import { MessagesState, ProposedTriggerEventArray } from "../state";
+import { logger } from "../utils/logger";
+import { queryScraper } from "../tools/webSearch";
+import { rankAndDisplayData } from "../tools/triggerEventTools";
+
+export const selfEvalSetup: GraphNode<typeof MessagesState> = async (state) => {
+  let genResponse = state.messages.at(-1)?.content.toString() ?? "";
+  const parsed = ProposedTriggerEventArray.parse(JSON.parse(genResponse));
+
+  for (let i = 0; i < parsed.length; i++) {
+    const search = parsed[i].SearchQuery
+    const data = await queryScraper(search);
+    const output = await rankAndDisplayData(data, search);
+
+    parsed[i].context = output;
+  }
+
+  return { evalTriggerEvent: parsed };
+
+};
@@ -13,15 +13,6 @@ export const verificationSetup: GraphNode<typeof MessagesState> = async (state)
    let genResponse = state.messages.at(-1)?.content.toString() ?? "";
    const parsed = ProposedTriggerEventArray.parse(JSON.parse(genResponse));

-    for (let i = 0; i < parsed.length; i++) {
-      const search = parsed[i].SearchQuery
-      // const data = await queryScraper(search);
-      // const output = await rankAndDisplayData(data, search);
-
-      // parsed[i].context = output;
-      parsed[i].context = "NONE"
-    }
-    
    return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
  }
  else {
@@ -0,0 +1,30 @@
+You are an impartial and meticulous evaluator assessing LLM’s response based on key quality dimensions
+of honesty and usefulness. Your goal is to provide structured feedback that can be used to improve the
+response.
+Evaluation task: please follow these steps carefully:
+1. Analyze the response based on the three dimensions below.
+2. Provide justifications first: write a brief explanation justifying your assessment for each dimension.
+3. Assign scores after justification: assign a score from 1 (poor) to 10 (excellent) for each dimension
+based on your justification.
+4. Synthesize: provide a brief overall impression and the single most important suggestion for improve-
+ment, keeping in mind that explanation/honesty is the top priority, then followed by guidance.
+Critique dimensions (evaluate in this order):
+(1) Speficicity and usefullness: Can the proposed event be used to create a dataset of concrete events mapped to later
+disinformation.
+(2) Existance: Using the context provided, can the user be certain that the proposed trigger event actually happened
+(3) Causality: Is there a possible link from the proposed trigger event to the disinformaiton at hand
+Overall impression & key improvement suggestion: Briefly summarize the overall quality and state the
+most critical change needed to improve the response.
+
+Disinformation query: 
+###NTITLE###
+Disinformation date:
+###CDATE###
+
+LLM’s response to evaluate:
+###LM###
+
+Provided context:
+###VESEARCHES###
+
+Let's think it through step by step
@@ -15,6 +15,10 @@ export async function hydratePrompt(path: string, state: any) : Promise<string>
        raw = raw.replace("###LM###", state.messages.at(-1).content);
    }

+    if (raw.indexOf("###L2M###") != -1) {
+        raw = raw.replace("###L2M###", state.messages.at(-2).content);
+    }
+
    if (raw.indexOf("###NTITLE###") != -1) {
        raw = raw.replace("###NTITLE###", state.normalizedClaim);
    }
@@ -33,5 +37,12 @@ export async function hydratePrompt(path: string, state: any) : Promise<string>
        raw = raw.replace("###TESEARCH###", output)
    }

+    if (raw.indexOf("###VESEARCHES###") != -1) {
+        const output = state.evalTriggerEvent
+            .map(e => e.context)
+            .join("\n")
+        raw = raw.replace("###VESEARCHES###", output)
+    }
+
    return raw;
 }
@@ -0,0 +1,40 @@
+You are an expert editor tasked with making targeted improvements to an existing LLM’s response based
+on a specific critique with the primary goal of enhancing its score according to evaluation standards while
+preserving its strengths.
+Your revision task: generate a revised version of the existing response. Your goal is not to rewrite it
+completely, but to make precise edits only to address the specific weaknesses highlighted in the critique.
+Instructions for editing:
+- Identify specific flaws: carefully read the critique and pinpoint the exact issues raised (e.g., unclear
+explanation, vagueness, inappropriate responses, the key suggestion).
+- Perform minimal targeted edits: modify only the necessary sentences or paragraphs within the existing
+response to directly fix these identified flaws.
+- Strongly preserve strengths: crucially keep all other parts of the existing response intact. Do not
+rephrase, restructure, or remove sections that were not criticized or likely contributed positively to its
+initial score.
+- Ensure coherence: verify that your targeted edits integrate smoothly and do not introduce contradictions
+or awkward phrasing.
+Output requirements:
+- It should feel like a slightly polished or corrected version of the existing response, not a fundamentally
+different answer.
+- Do not mention the critique, scores, or the editing process. The output should be clean json that passes validation checks
+
+Again, use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,Date".
+Use tools available to you if further information is required
+
+Add no new events, only improve the existing items
+
+Disinformation query: 
+###NTITLE###
+Disinformation date:
+###CDATE###
+
+LLM’s response to improve:
+###L2M###
+
+Citique:
+###LM###
+
+This contains specific feedback, justifications, scores from 1 to 10, and potentially a key improvement
+suggestion. Focus on the justifications for low scores and the key suggestion.
+
+Let's think it through step by step
@@ -3,9 +3,10 @@ Once the information has been created as below, a dataset can be created to feed

 There is a false disinformation claim circulating:
 ###NTITLE###
-Produce up-to 5 specific events that happened that have led to the spread of this disinformation.
+Produce up-to 5 specific "trigger events" that happened that could have led to the spread of this disinformation.

 Remember the time frame of the disinformation campaign: ###CDATE###
+Include no information or events that would not have been available at the time. 

 Produce no more text other than the json.

@@ -13,8 +14,16 @@ Include a concise but specific search query that can be looked up on a search en

 Include a url to a source for your trigger event (not a web search, a specific url from a reputuable source). Do not use OAI cite, include url as text in response.

-Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url".
+Include the date that the event happened ("March 2022" for exmaple)
+
+Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,Date".

 Multiple tool invocations should be requested at once, if applicable.
+Use your abilities to look between the lines and produce some insightful analysis, thinking both short and long term.
+
+Events will be reordered as part of processing, each statement must stand alone

 The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
+Analysis should only be completed for proposed events that would graner >0.7 points
+
+Lets go through it step by step
@@ -9,6 +9,7 @@ export const ProposedTriggerEvent = z.object({
  ReasoningWhyRelevant: z.string(),
  SearchQuery: z.string(),
  Url: z.url(),
+  Date: z.string(),
  context: z.string().optional(),
  score: z.number().optional()
 })
@@ -20,6 +21,7 @@ export const MessagesState = new StateSchema({
  date: z.string(),
  messages: MessagesValue,
  proposedTriggerEvent: ProposedTriggerEventArray,
+  evalTriggerEvent: ProposedTriggerEventArray,
  proposedTriggerEventIndex: z.int(),
  normalizedClaim: z.string(),
 });
@@ -15,6 +15,8 @@ const CACHE_PATH = "../data/csv.cache.json";

 const JSONL_PATH = "../data/input.jsonl"

+const BM25_MIN_DOCS = 3;
+
 type EmbeddingCache = {
  rawtexts: string[];
  cleantexts: string[];
@@ -287,8 +289,20 @@ async function embedText(text: string): Promise<number[]> {
 }

 function buildBM25(texts: string[]) {
-  logger.info("Building BM25 index (%s docs)...", texts.length);
+  let paddedTexts = texts;

+  if (texts.length < BM25_MIN_DOCS) {
+    const needed = BM25_MIN_DOCS - texts.length;
+    logger.error(
+      "Corpus too small for BM25 (%s docs, need %s+), padding with %s dummy doc(s)",
+      texts.length,
+      BM25_MIN_DOCS,
+      needed
+    );
+    paddedTexts = [...texts, ...Array(needed).fill("placeholder dummy document")];
+  }
+
+  logger.info("Building BM25 index (%s docs)...", paddedTexts.length);
  const bm25 = bm25Factory();

  bm25.defineConfig({
@@ -302,7 +316,7 @@ function buildBM25(texts: string[]) {
    nlp.tokens.removeWords,
  ]);

-  texts.forEach((text, i) => {
+  paddedTexts.forEach((text, i) => {
    bm25.addDoc({ text }, i);
  });

@@ -1,32 +1,92 @@
 import { Builder, Browser } from "selenium-webdriver";
 import firefox from "selenium-webdriver/firefox";
+import { backOff } from "exponential-backoff";
+import { logger } from "../utils/logger";

 export async function extractWebpageContent(url: string): Promise<string[]> {
+  try {
+    const response = await backOff(async () => {
+      return await extractWebpageContentWorker(url);
+    }, {
+      numOfAttempts: 10,
+      startingDelay: 500,
+      timeMultiple: 2,
+      jitter: "full",
+      maxDelay: 50000,
+    });
+    return response;
+  } catch (err: any) {
+    logger.error(`Failed out of retry loop for URL "${url}", returning placeholder to pipeline`);
+    return ["API EXCEPTION"];
+  }
+}
+
+async function extractWebpageContentWorker(url: string): Promise<string[]> {
+  let driver;
+  try {
    const options = new firefox.Options();
    options.addArguments("--headless");
+    driver = await new Builder()
+      .forBrowser(Browser.FIREFOX)
+      .setFirefoxOptions(options)
+      .build();
+  } catch (err: any) {
+    const desc = `Failed to launch Firefox driver: ${err.message}`;
+    logger.error(desc);
+    throw new Error(desc);
+  }
+
+  try {
+    try {
+      await driver.get(url);
+    } catch (err: any) {
+      const desc = `Failed to navigate to URL "${url}": ${err.message}`;
+      logger.error(desc);
+      throw new Error(desc);
+    }

-    let driver = await new Builder().forBrowser(Browser.FIREFOX).setFirefoxOptions(options).build()
    try {
-        await driver.get(url)
      await driver.wait(async () => {
        return await driver.executeScript(
          "return document.readyState === 'complete'"
        );
      }, 5000);
+    } catch (err: any) {
+      logger.error(`Page load timed out for "${url}", attempting to read partial content: ${err.message}`);
+      // do not throw, attempt to read
+    }

-        const readableText = await driver.executeScript(
+    let readableText: string;
+    try {
+      readableText = await driver.executeScript(
        "return document.body.innerText;"
      ) as string;
+    } catch (err: any) {
+      const desc = `Failed to extract page text from "${url}": ${err.message}`;
+      logger.error(desc);
+      throw new Error(desc);
+    }

    const filteredLines = readableText
      .split(/\r?\n/)
      .map(line => line.trim())
      .filter(line => line.split(/\s+/).length > 1);

+    if (filteredLines.length === 0) {
+      const desc = `No content extracted from "${url}"`;
+      logger.error(desc);
+      throw new Error(desc);
+    }
+
    return filteredLines;
  } finally {
-        await driver.quit()
+    try {
+      await driver.quit();
+    } catch (err: any) {
+      logger.error(`Failed to quit Firefox driver cleanly: ${err.message}`);
+    }
  }
 }

 // console.log(await extractWebpageContent("https://www.bbc.co.uk/news/live/c74wd01egvyt"))
+// console.log(await extractWebpageContent("https://badcertificate.int.jeynes.uk/"))
@@ -118,7 +118,7 @@ async function processRecord(record: any): Promise<ResultRecord> {
      input: buildAgentInput(record),
      streamMode: "values",
      config: {
-        recursion_limit: 50
+        recursion_limit: 100
      }
    });
Author	SHA1	Message	Date
William Jeynes	a80d433fb6	Add self improvement pattern with two new prompt nodes	2026-03-26 14:44:48 +00:00
William Jeynes	5e374a8bd6	Fix errors seen during longer runs: selenium exceptions, insecure certificates, recusrsion limit exceeded, BM25 document corpus too small	2026-03-26 12:22:13 +00:00
William Jeynes	fbc688b8f9	add date to returned data	2026-03-25 22:37:14 +00:00