Update README, lock langchain CLI to specific version

Remove some very unused prompts
Add database link to README
2026-05-07 18:45:12 +01:00 · 2026-05-03 21:46:54 +01:00 · 2026-04-09 15:46:18 +01:00 · 2026-04-05 22:47:25 +01:00 · 2026-04-05 12:31:09 +01:00 · 2026-04-05 11:51:28 +01:00
18 changed files with 218 additions and 151 deletions
@@ -1,9 +1,22 @@
 # AI models for identifying trigger events in disinformation analysis
 Final Dissertation Submission Repository

-## Project Description
+## Abstract
 -- todo --

+[Project Presentation](https://jillweynes.github.io/LLMsForDisinformationPrediction-GraphVizBuilt/presentation)
+
+## Generated Database Link and Usage Experiments
+Generated Dataset Link: [https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset](https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset)
+
+Graph-Based Dataset Visualisation: [https://jillweynes.github.io/LLMsForDisinformationPrediction-GraphVizBuilt/](https://jillweynes.github.io/LLMsForDisinformationPrediction-GraphVizBuilt/)
+
+Usage Experiments (incl graph visualisation) Source Code: [https://github.com/WillJeynes/LLMsForDisinformationPrediction](https://github.com/WillJeynes/LLMsForDisinformationPrediction)
+
+
+
+# This repository:
+
 ## Solution Diagram
 -- todo --

@@ -13,8 +26,6 @@ Final Dissertation Submission Repository
 ## Agent Refinement
 [See agent](/agent/)

-## Generated Database Link and Usage Experiments
-- todo --

 ## Repository Structure
 ```
@@ -1,3 +1,32 @@
 ## Refining the agent output

-TODO: Table and document experiments
+Experiments modifying pipeline
+
+| Model            | % Correct | % Change |
+|------------------|----------:|---------:|
+| BASELINE         | 33        | 0        |
+| Improv Prompt    | 39.96     | 0.21     |
+| Add Examples     | 44.67     | 0.35     |
+| Date             | 45.51     | 0.38     |
+| Chain of Thought | 43.38     | 0.31     |
+| Self-Critique    | 44.36     | 0.34     |
+
+Experiments with different model types:
+| Model                         | % Correct | % Change |
+|-------------------------------|----------:|---------:|
+| gpt-5-mini                    | 45.51     |          |
+| gpt-5.4-mini                  | 32.4      |          |
+| gpt-5.4-nano                  | 23.28     |          |
+| gpt-4.1-mini                  | 27.85     |          |
+| gpt-4o-mini                   | 32.47     |          |
+| llama3.1:8b-instruct-q4_K_M   | ?         |          |
+| qwen3.5:9b                    | 0         |          |
+
+%age valid URLS
+| Model                         | Number    | % Age    |
+|-------------------------------|----------:|---------:|
+| gpt-5-mini                    | 22/405    | 5.43     |
+| gpt-5.4-mini                  | 29/278    | 10.43    |
+| gpt-5.4-nano                  | 6/210     | 2.85     |
+| gpt-4.1-mini                  | 15/269    | 5.57     |
+| gpt-4o-mini                   | 27/287    | 9.407    |
@@ -11,18 +11,13 @@ import { loopEndConditional } from "./conditionals/loop_end";
 import { sort } from "./nodes/sort";
 import { triggerEventSetup } from "./nodes/triggerEventSetup";
 import { createEnsembleNode } from "./nodes/ensembleNode";
-import { selfEvalSetup } from "./nodes/selfEvalSetup";

 const triggerEventToolNode = createToolNode(triggerEventToolsByName);
-const peToolNode = createToolNode(triggerEventToolsByName);

 const normalisationModel = createModelNode([], "normalization.txt");
 const triggerEventModel = createModelNode(triggerEventToolsByName, "trigger.txt");
-const evaluationModel = createModelNode([], "eval.txt");
-const peModel = createModelNode(triggerEventToolsByName, "posteval.txt");

-const triggerEventToolConditional = createToolConditional("triggerEventToolNode", selfEvalSetup.name);
-const peToolConditional = createToolConditional("peToolNode", verificationSetup.name);
+const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name);

 const roNode = createEnsembleNode("ROBERTA", "roberta");
 const flNode = createEnsembleNode("FLAN", "flan");
@@ -38,12 +33,6 @@ const agent = new StateGraph(MessagesState)
  .addNode("triggerEventToolNode", triggerEventToolNode)
  .addNode("triggerEventModel", triggerEventModel)

-  .addNode(selfEvalSetup.name, selfEvalSetup)
-  .addNode("evaluationModel", evaluationModel)
-  
-  .addNode("peToolNode", peToolNode)
-  .addNode("peModel", peModel)
-
  .addNode(verificationSetup.name, verificationSetup)

  .addNode("roNode", roNode)
@@ -60,16 +49,9 @@ const agent = new StateGraph(MessagesState)
  .addEdge(triggerEventSetup.name, "triggerEventModel")
  
  // @ts-expect-error
-  .addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", selfEvalSetup.name])
+  .addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
  .addEdge("triggerEventToolNode", "triggerEventModel")
  
-  .addEdge(selfEvalSetup.name, "evaluationModel")
-  .addEdge("evaluationModel", "peModel")
-
-  // @ts-expect-error
-  .addConditionalEdges("peModel", peToolConditional, ["peToolNode", verificationSetup.name])
-  .addEdge("peToolNode", "peModel")
-  
  .addEdge(verificationSetup.name, "roNode")
  .addEdge(verificationSetup.name, "flNode")
  .addEdge(verificationSetup.name, "lrNode")
@@ -1,21 +0,0 @@
-import { GraphNode } from "@langchain/langgraph";
-import { MessagesState, ProposedTriggerEventArray } from "../state";
-import { logger } from "../utils/logger";
-import { queryScraper } from "../tools/webSearch";
-import { rankAndDisplayData } from "../tools/triggerEventTools";
-
-export const selfEvalSetup: GraphNode<typeof MessagesState> = async (state) => {
-  let genResponse = state.messages.at(-1)?.content.toString() ?? "";
-  const parsed = ProposedTriggerEventArray.parse(JSON.parse(genResponse));
-
-  for (let i = 0; i < parsed.length; i++) {
-    const search = parsed[i].SearchQuery
-    const data = await queryScraper(search);
-    const output = await rankAndDisplayData(data, search);
-
-    parsed[i].context = output;
-  }
-
-  return { evalTriggerEvent: parsed };
-
-};
@@ -1,8 +1,7 @@
 import { GraphNode } from "@langchain/langgraph";
 import { MessagesState, ProposedTriggerEventArray } from "../state";
 import { logger } from "../utils/logger";
-import { queryScraper } from "../tools/webSearch";
-import { rankAndDisplayData } from "../tools/triggerEventTools";
+import { jsonrepair } from 'jsonrepair'

 export const verificationSetup: GraphNode<typeof MessagesState> = async (state) => {
  //this is kinda doing two things, but having two nodes for it seems overkill
@@ -11,8 +10,31 @@ export const verificationSetup: GraphNode<typeof MessagesState> = async (state)
    logger.warn("No trigger events in memory, parsing")

    let genResponse = state.messages.at(-1)?.content.toString() ?? "";
-    const parsed = ProposedTriggerEventArray.parse(JSON.parse(genResponse));

+    const repaired = jsonrepair(genResponse);
+
+    let parsed;
+
+    try {
+      const json = JSON.parse(repaired);
+
+      if (Array.isArray(json)) {
+        parsed = ProposedTriggerEventArray.parse(json);
+      } else {
+        // try grab first value
+        const firstValue = Object.values(json)[0];
+
+        if (Array.isArray(firstValue)) {
+          parsed = ProposedTriggerEventArray.parse(firstValue);
+        } else {
+          throw new Error("No array found in JSON");
+        }
+      }
+    } catch (err: any) {
+      logger.error(`Failed to parse LLM response: ${err.message}`);
+      throw new Error(`Failed to parse LLM response: ${err}`);
+    }
+    
    return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
  }
  else {
@@ -20,6 +20,7 @@
        "dotenv": "^17.2.3",
        "exponential-backoff": "^3.1.3",
        "fs": "^0.0.1-security",
+        "jsonrepair": "^3.13.3",
        "langchain": "^1.2.14",
        "selenium-webdriver": "^4.40.0",
        "tldts": "^7.0.23",
@@ -2075,6 +2076,15 @@
      "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
      "license": "ISC"
    },
+    "node_modules/jsonrepair": {
+      "version": "3.13.3",
+      "resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.13.3.tgz",
+      "integrity": "sha512-BTznj0owIt2CBAH/LTo7+1I5pMvl1e1033LRl/HUowlZmJOIhzC0zbX5bxMngLkfT4WnzPP26QnW5wMr2g9tsQ==",
+      "license": "ISC",
+      "bin": {
+        "jsonrepair": "bin/cli.js"
+      }
+    },
    "node_modules/jszip": {
      "version": "3.10.1",
      "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz",
@@ -24,6 +24,7 @@
    "dotenv": "^17.2.3",
    "exponential-backoff": "^3.1.3",
    "fs": "^0.0.1-security",
+    "jsonrepair": "^3.13.3",
    "langchain": "^1.2.14",
    "selenium-webdriver": "^4.40.0",
    "tldts": "^7.0.23",
@@ -1,30 +0,0 @@
-You are an impartial and meticulous evaluator assessing LLM’s response based on key quality dimensions
-of honesty and usefulness. Your goal is to provide structured feedback that can be used to improve the
-response.
-Evaluation task: please follow these steps carefully:
-1. Analyze the response based on the three dimensions below.
-2. Provide justifications first: write a brief explanation justifying your assessment for each dimension.
-3. Assign scores after justification: assign a score from 1 (poor) to 10 (excellent) for each dimension
-based on your justification.
-4. Synthesize: provide a brief overall impression and the single most important suggestion for improve-
-ment, keeping in mind that explanation/honesty is the top priority, then followed by guidance.
-Critique dimensions (evaluate in this order):
-(1) Speficicity and usefullness: Can the proposed event be used to create a dataset of concrete events mapped to later
-disinformation.
-(2) Existance: Using the context provided, can the user be certain that the proposed trigger event actually happened
-(3) Causality: Is there a possible link from the proposed trigger event to the disinformaiton at hand
-Overall impression & key improvement suggestion: Briefly summarize the overall quality and state the
-most critical change needed to improve the response.
-
-Disinformation query: 
-###NTITLE###
-Disinformation date:
-###CDATE###
-
-LLM’s response to evaluate:
-###LM###
-
-Provided context:
-###VESEARCHES###
-
-Let's think it through step by step
@@ -15,10 +15,6 @@ export async function hydratePrompt(path: string, state: any) : Promise<string>
        raw = raw.replace("###LM###", state.messages.at(-1).content);
    }

-    if (raw.indexOf("###L2M###") != -1) {
-        raw = raw.replace("###L2M###", state.messages.at(-2).content);
-    }
-
    if (raw.indexOf("###NTITLE###") != -1) {
        raw = raw.replace("###NTITLE###", state.normalizedClaim);
    }
@@ -37,12 +33,5 @@ export async function hydratePrompt(path: string, state: any) : Promise<string>
        raw = raw.replace("###TESEARCH###", output)
    }

-    if (raw.indexOf("###VESEARCHES###") != -1) {
-        const output = state.evalTriggerEvent
-            .map(e => e.context)
-            .join("\n")
-        raw = raw.replace("###VESEARCHES###", output)
-    }
-
    return raw;
 }
@@ -1,40 +0,0 @@
-You are an expert editor tasked with making targeted improvements to an existing LLM’s response based
-on a specific critique with the primary goal of enhancing its score according to evaluation standards while
-preserving its strengths.
-Your revision task: generate a revised version of the existing response. Your goal is not to rewrite it
-completely, but to make precise edits only to address the specific weaknesses highlighted in the critique.
-Instructions for editing:
- Identify specific flaws: carefully read the critique and pinpoint the exact issues raised (e.g., unclear
-explanation, vagueness, inappropriate responses, the key suggestion).
- Perform minimal targeted edits: modify only the necessary sentences or paragraphs within the existing
-response to directly fix these identified flaws.
- Strongly preserve strengths: crucially keep all other parts of the existing response intact. Do not
-rephrase, restructure, or remove sections that were not criticized or likely contributed positively to its
-initial score.
- Ensure coherence: verify that your targeted edits integrate smoothly and do not introduce contradictions
-or awkward phrasing.
-Output requirements:
- It should feel like a slightly polished or corrected version of the existing response, not a fundamentally
-different answer.
- Do not mention the critique, scores, or the editing process. The output should be clean json that passes validation checks
-
-Again, use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,Date".
-Use tools available to you if further information is required
-
-Add no new events, only improve the existing items
-
-Disinformation query: 
-###NTITLE###
-Disinformation date:
-###CDATE###
-
-LLM’s response to improve:
-###L2M###
-
-Citique:
-###LM###
-
-This contains specific feedback, justifications, scores from 1 to 10, and potentially a key improvement
-suggestion. Focus on the justifications for low scores and the key suggestion.
-
-Let's think it through step by step
@@ -1,9 +0,0 @@
-Could the following real-world event:
-###TECLAIM###
-
-Be a trigger for the following disinformation:
-###TITLE###
-
-Respond with "RELATION", followed by : followed by a confidence score (VERYHIGH, HIGH, MEDIUM, LOW, VERYLOW) followed by : followed by the reason. Use no other words, just return the score and reason in format.
-
-Ignore wether the event happened or not, purely consider the likiness of causation
@@ -1,8 +0,0 @@
-Do the search results cited below
-###TESEARCH###
-Support the idea that the following happened:
-###TECLAIM###
-
-Respond with "CONFIDENCE", followed by : followed by a confidence score (VERYHIGH, HIGH, MEDIUM, LOW, VERYLOW) followed by : followed by the reason. Use no other words, just return the score and reason in format.
-
-Dates can be off by a few days, that would still be valid
@@ -21,7 +21,6 @@ export const MessagesState = new StateSchema({
  date: z.string(),
  messages: MessagesValue,
  proposedTriggerEvent: ProposedTriggerEventArray,
-  evalTriggerEvent: ProposedTriggerEventArray,
  proposedTriggerEventIndex: z.int(),
  normalizedClaim: z.string(),
 });
@@ -5,7 +5,7 @@ set -e
 run_agent () {
    echo "Starting LangGraph agent..."
    cd agent
-    npx @langchain/langgraph-cli dev
+    npx @langchain/langgraph-cli@1.1.17 dev
 }

 run_ensemble_service () {
@@ -9,6 +9,7 @@ datasets
 # ROBERTA
 scikit-learn
 transformers[torch]
+sentence_transformers

 # Utils
 numpy
@@ -19,6 +19,9 @@ const MODE = process.env.MODE ?? "claim";

 const MAX_CONCURRENCY = 5;

+const OFFSET = parseInt(process.env.OFFSET ?? "0", 10);
+const LIMIT = process.env.LIMIT ? parseInt(process.env.LIMIT, 10) : null;
+
 const client = new Client({ apiUrl: API_URL });


@@ -164,10 +167,19 @@ async function processRecord(record: any): Promise<ResultRecord> {
 async function main() {
  console.log("Reading input file...");

-  const records = await loadInputs();
+  const allRecords = await loadInputs();

-  console.log(`Loaded ${records.length} records`);
+  console.log(`Loaded ${allRecords.length} records`);

+  const records = allRecords.slice(
+    OFFSET,
+    LIMIT !== null ? OFFSET + LIMIT : undefined
+  );
+
+  console.log(
+    `Processing ${records.length} records (offset=${OFFSET}, limit=${LIMIT ?? "∞"})`
+  );
+  
  fs.writeFileSync(OUTPUT_FILE, "", { flag: "a" });

  const limit = pLimit(MAX_CONCURRENCY);
@@ -0,0 +1,119 @@
+import json
+import argparse
+from urllib.parse import urlparse
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.common.exceptions import WebDriverException, TimeoutException, StaleElementReferenceException
+from tqdm import tqdm
+
+def init_driver():
+    options = Options()
+    options.headless = True
+    options.add_argument("--disable-gpu")
+    options.add_argument("--no-sandbox")
+    options.add_argument("--headless")
+    options.add_argument("--disable-blink-features=AutomationControlled")
+    options.add_argument("--window-size=1920,1080")
+    prefs = {
+        "profile.managed_default_content_settings.images": 2,  # block images
+        "profile.default_content_setting_values.stylesheets": 2,  # block CSS
+        "profile.managed_default_content_settings.cookies": 2,  # optional
+    }
+    options.add_experimental_option("prefs", prefs)
+
+    driver = webdriver.Chrome(options=options)
+    driver.set_page_load_timeout(30)
+    return driver
+
+def is_root_url(url):
+    parsed = urlparse(url)
+    return parsed.path in ("", "/")
+
+def is_404_page(driver):
+    """Safely check for 404, handling stale elements."""
+    try:
+        title = driver.title.lower()
+        body_text = driver.find_element("tag name", "body").text.lower()
+        return "404" in title or "404" in body_text
+    except StaleElementReferenceException:
+        return False
+    except Exception:
+        return False
+
+def check_url_selenium(url):
+    driver = None
+    try:
+        driver = init_driver()
+        driver.get(url)
+        # 404 check
+        if is_404_page(driver):
+            return False, "404 page detected"
+        # Root URL after redirects
+        final_url = driver.current_url
+        if is_root_url(final_url):
+            return False, f"Redirected to root URL ({final_url})"
+        return True, None
+    except (WebDriverException, TimeoutException) as e:
+        return False, str(e)
+    finally:
+        if driver:
+            driver.quit()
+
+def process_event(event):
+    """Process an event only if score > 0.4."""
+    score = event.get("score", 0)
+    if score <= 0.4:
+        return None, False, "Score too low"
+    url = event.get("Url")
+    if not url:
+        return None, False, "No URL"
+    is_valid, error_msg = check_url_selenium(url)
+    event["url_valid"] = is_valid
+    return url, is_valid, error_msg
+
+def process_jsonl_file(file_path, max_workers=4):
+    invalid_urls = []
+    valid_urls = 0
+
+    # Gather events with score > 0.4
+    urls_to_check = []
+    with open(file_path, "r", encoding="utf-8") as f:
+        for line in f:
+            line_data = json.loads(line)
+            if line_data.get("status") != "success":
+                continue
+            for event in line_data.get("events", []):
+                if event.get("score", 0) > 0.4:
+                    urls_to_check.append(event)
+
+    total_urls = len(urls_to_check)
+
+    # ThreadPoolExecutor with tqdm progress bar
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_to_event = {executor.submit(process_event, e): e for e in urls_to_check}
+        for future in tqdm(as_completed(future_to_event), total=total_urls, desc="Checking URLs"):
+            url, is_valid, error_msg = future.result()
+            if not is_valid and url:
+                invalid_urls.append((url, error_msg))
+            else:
+                valid_urls += 1
+
+    # Summary
+    if invalid_urls:
+        print("\nList of invalid URLs and reasons:")
+        for url, err in invalid_urls:
+            print(f"{url} --> {err}")
+    print("\n=== URL Validation Summary ===")
+    print(f"Total URLs processed: {total_urls}")
+    print(f"Valid URLs (loaded successfully): {valid_urls}")
+    print(f"Invalid URLs: {len(invalid_urls)}")
+    
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Validate URLs in JSONL file events using Selenium")
+    parser.add_argument("file_path", type=str, help="Path to the JSONL file")
+    parser.add_argument("--workers", type=int, default=4, help="Number of parallel Selenium workers")
+    args = parser.parse_args()
+
+    process_jsonl_file(args.file_path, max_workers=args.workers)
@@ -27,7 +27,7 @@ DEFAULT_PARAMS = [
    ("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
 ]

-NUM_RANDOM_CLAIMS = 200
+NUM_RANDOM_CLAIMS = 2000

 INPUT_FILE = "../../data/input.jsonl"
 OUTPUT_FILE = "../../data/claims.json"
Author	SHA1	Message	Date
William Jeynes	4e0bab9897	Update README, lock langchain CLI to specific version	2026-05-07 18:45:12 +01:00
William Jeynes	c4dac3f515	Remove some very unused prompts	2026-05-03 21:46:54 +01:00
William Jeynes	2252a42466	Add database link to README	2026-04-09 15:46:18 +01:00
William Jeynes	75ca1032a6	Add offset and limit in pereparation for the large dataset	2026-04-05 22:47:25 +01:00
William Jeynes	00d129bd28	add % valid URLs for different model	2026-04-05 12:31:09 +01:00
William Jeynes	cf923d6e87	Add new accuracy results	2026-04-05 11:51:28 +01:00
William Jeynes	f821e9643d	Add url validity metrics	2026-04-04 20:02:25 +01:00
William Jeynes	43ecd04135	add multithreading	2026-04-04 19:42:02 +01:00
William Jeynes	8c0921057b	start on work to calculate % if valid URLS	2026-04-04 18:52:47 +01:00
William Jeynes	b610e8c989	Add sentence transformers to requirements for ensemble service	2026-03-31 15:52:14 +01:00
William Jeynes	f8d4155b7c	Add more robust parsing of LLM JSON output	2026-03-27 11:09:59 +00:00