4 Commits

Author SHA1 Message Date
William Jeynes 38ca7a3d34 I can't affort the full model lol. Use jsonrepair module to fix agent malformed JSON instead. 2026-03-26 15:37:14 +00:00
William Jeynes 38b6fb6a0e Use an even better model 2026-03-26 15:14:43 +00:00
William Jeynes c7cccb87c3 Update to 5.4 mini 2026-03-26 12:44:01 +00:00
William Jeynes fd0674e96a Add a chain of thought to the main prompt 2026-03-26 12:33:43 +00:00
13 changed files with 38 additions and 200 deletions
+3 -14
View File
@@ -1,22 +1,9 @@
# AI models for identifying trigger events in disinformation analysis
Final Dissertation Submission Repository
## Abstract
## Project Description
-- todo --
[Project Presentation](https://jillweynes.github.io/LLMsForDisinformationPrediction-GraphVizBuilt/presentation)
## Generated Database Link and Usage Experiments
Generated Dataset Link: [https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset](https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset)
Graph-Based Dataset Visualisation: [https://jillweynes.github.io/LLMsForDisinformationPrediction-GraphVizBuilt/](https://jillweynes.github.io/LLMsForDisinformationPrediction-GraphVizBuilt/)
Usage Experiments (incl graph visualisation) Source Code: [https://github.com/WillJeynes/LLMsForDisinformationPrediction](https://github.com/WillJeynes/LLMsForDisinformationPrediction)
# This repository:
## Solution Diagram
-- todo --
@@ -26,6 +13,8 @@ Usage Experiments (incl graph visualisation) Source Code: [https://github.com/Wi
## Agent Refinement
[See agent](/agent/)
## Generated Database Link and Usage Experiments
-- todo --
## Repository Structure
```
+1 -30
View File
@@ -1,32 +1,3 @@
## Refining the agent output
Experiments modifying pipeline
| Model | % Correct | % Change |
|------------------|----------:|---------:|
| BASELINE | 33 | 0 |
| Improv Prompt | 39.96 | 0.21 |
| Add Examples | 44.67 | 0.35 |
| Date | 45.51 | 0.38 |
| Chain of Thought | 43.38 | 0.31 |
| Self-Critique | 44.36 | 0.34 |
Experiments with different model types:
| Model | % Correct | % Change |
|-------------------------------|----------:|---------:|
| gpt-5-mini | 45.51 | |
| gpt-5.4-mini | 32.4 | |
| gpt-5.4-nano | 23.28 | |
| gpt-4.1-mini | 27.85 | |
| gpt-4o-mini | 32.47 | |
| llama3.1:8b-instruct-q4_K_M | ? | |
| qwen3.5:9b | 0 | |
%age valid URLS
| Model | Number | % Age |
|-------------------------------|----------:|---------:|
| gpt-5-mini | 22/405 | 5.43 |
| gpt-5.4-mini | 29/278 | 10.43 |
| gpt-5.4-nano | 6/210 | 2.85 |
| gpt-4.1-mini | 15/269 | 5.57 |
| gpt-4o-mini | 27/287 | 9.407 |
TODO: Table and document experiments
+1 -1
View File
@@ -9,7 +9,7 @@ export function createModelNode(tools: any, promptPath: string): GraphNode<typeo
const sysPrompt = await hydratePrompt(promptPath, state);
const model = new ChatOpenAI({
model: "gpt-5-mini"
model: "gpt-5.4-mini"
});
const modelWithTools = model.bindTools(Object.values(tools));
+7 -18
View File
@@ -13,26 +13,15 @@ export const verificationSetup: GraphNode<typeof MessagesState> = async (state)
const repaired = jsonrepair(genResponse);
let parsed;
const parsed = ProposedTriggerEventArray.parse(JSON.parse(repaired));
try {
const json = JSON.parse(repaired);
for (let i = 0; i < parsed.length; i++) {
const search = parsed[i].SearchQuery
// const data = await queryScraper(search);
// const output = await rankAndDisplayData(data, search);
if (Array.isArray(json)) {
parsed = ProposedTriggerEventArray.parse(json);
} else {
// try grab first value
const firstValue = Object.values(json)[0];
if (Array.isArray(firstValue)) {
parsed = ProposedTriggerEventArray.parse(firstValue);
} else {
throw new Error("No array found in JSON");
}
}
} catch (err: any) {
logger.error(`Failed to parse LLM response: ${err.message}`);
throw new Error(`Failed to parse LLM response: ${err}`);
// parsed[i].context = output;
parsed[i].context = "NONE"
}
return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
+9
View File
@@ -0,0 +1,9 @@
Could the following real-world event:
###TECLAIM###
Be a trigger for the following disinformation:
###TITLE###
Respond with "RELATION", followed by : followed by a confidence score (VERYHIGH, HIGH, MEDIUM, LOW, VERYLOW) followed by : followed by the reason. Use no other words, just return the score and reason in format.
Ignore wether the event happened or not, purely consider the likiness of causation
+4
View File
@@ -26,4 +26,8 @@ Events will be reordered as part of processing, each statement must stand alone
The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
Analysis should only be completed for proposed events that would graner >0.7 points
First, consider a range of directions in which the proposed disinformation could have been influenced by.
Then, research these directions in turn, using the tools at hand.
Finally, refine your proposed "trigger event" until it is specific, quantifiable and backed up by evidence.
Lets go through it step by step
+8
View File
@@ -0,0 +1,8 @@
Do the search results cited below
###TESEARCH###
Support the idea that the following happened:
###TECLAIM###
Respond with "CONFIDENCE", followed by : followed by a confidence score (VERYHIGH, HIGH, MEDIUM, LOW, VERYLOW) followed by : followed by the reason. Use no other words, just return the score and reason in format.
Dates can be off by a few days, that would still be valid
+1 -1
View File
@@ -8,7 +8,7 @@ export async function extractWebpageContent(url: string): Promise<string[]> {
const response = await backOff(async () => {
return await extractWebpageContentWorker(url);
}, {
numOfAttempts: 10,
numOfAttempts: 5,
startingDelay: 500,
timeMultiple: 2,
jitter: "full",
+1 -1
View File
@@ -5,7 +5,7 @@ set -e
run_agent () {
echo "Starting LangGraph agent..."
cd agent
npx @langchain/langgraph-cli@1.1.17 dev
npx @langchain/langgraph-cli dev
}
run_ensemble_service () {
@@ -9,7 +9,6 @@ datasets
# ROBERTA
scikit-learn
transformers[torch]
sentence_transformers
# Utils
numpy
+2 -14
View File
@@ -19,9 +19,6 @@ const MODE = process.env.MODE ?? "claim";
const MAX_CONCURRENCY = 5;
const OFFSET = parseInt(process.env.OFFSET ?? "0", 10);
const LIMIT = process.env.LIMIT ? parseInt(process.env.LIMIT, 10) : null;
const client = new Client({ apiUrl: API_URL });
@@ -167,19 +164,10 @@ async function processRecord(record: any): Promise<ResultRecord> {
async function main() {
console.log("Reading input file...");
const allRecords = await loadInputs();
const records = await loadInputs();
console.log(`Loaded ${allRecords.length} records`);
console.log(`Loaded ${records.length} records`);
const records = allRecords.slice(
OFFSET,
LIMIT !== null ? OFFSET + LIMIT : undefined
);
console.log(
`Processing ${records.length} records (offset=${OFFSET}, limit=${LIMIT ?? "∞"})`
);
fs.writeFileSync(OUTPUT_FILE, "", { flag: "a" });
const limit = pLimit(MAX_CONCURRENCY);
-119
View File
@@ -1,119 +0,0 @@
import json
import argparse
from urllib.parse import urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import WebDriverException, TimeoutException, StaleElementReferenceException
from tqdm import tqdm
def init_driver():
options = Options()
options.headless = True
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--headless")
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_argument("--window-size=1920,1080")
prefs = {
"profile.managed_default_content_settings.images": 2, # block images
"profile.default_content_setting_values.stylesheets": 2, # block CSS
"profile.managed_default_content_settings.cookies": 2, # optional
}
options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(options=options)
driver.set_page_load_timeout(30)
return driver
def is_root_url(url):
parsed = urlparse(url)
return parsed.path in ("", "/")
def is_404_page(driver):
"""Safely check for 404, handling stale elements."""
try:
title = driver.title.lower()
body_text = driver.find_element("tag name", "body").text.lower()
return "404" in title or "404" in body_text
except StaleElementReferenceException:
return False
except Exception:
return False
def check_url_selenium(url):
driver = None
try:
driver = init_driver()
driver.get(url)
# 404 check
if is_404_page(driver):
return False, "404 page detected"
# Root URL after redirects
final_url = driver.current_url
if is_root_url(final_url):
return False, f"Redirected to root URL ({final_url})"
return True, None
except (WebDriverException, TimeoutException) as e:
return False, str(e)
finally:
if driver:
driver.quit()
def process_event(event):
"""Process an event only if score > 0.4."""
score = event.get("score", 0)
if score <= 0.4:
return None, False, "Score too low"
url = event.get("Url")
if not url:
return None, False, "No URL"
is_valid, error_msg = check_url_selenium(url)
event["url_valid"] = is_valid
return url, is_valid, error_msg
def process_jsonl_file(file_path, max_workers=4):
invalid_urls = []
valid_urls = 0
# Gather events with score > 0.4
urls_to_check = []
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
line_data = json.loads(line)
if line_data.get("status") != "success":
continue
for event in line_data.get("events", []):
if event.get("score", 0) > 0.4:
urls_to_check.append(event)
total_urls = len(urls_to_check)
# ThreadPoolExecutor with tqdm progress bar
with ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_event = {executor.submit(process_event, e): e for e in urls_to_check}
for future in tqdm(as_completed(future_to_event), total=total_urls, desc="Checking URLs"):
url, is_valid, error_msg = future.result()
if not is_valid and url:
invalid_urls.append((url, error_msg))
else:
valid_urls += 1
# Summary
if invalid_urls:
print("\nList of invalid URLs and reasons:")
for url, err in invalid_urls:
print(f"{url} --> {err}")
print("\n=== URL Validation Summary ===")
print(f"Total URLs processed: {total_urls}")
print(f"Valid URLs (loaded successfully): {valid_urls}")
print(f"Invalid URLs: {len(invalid_urls)}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Validate URLs in JSONL file events using Selenium")
parser.add_argument("file_path", type=str, help="Path to the JSONL file")
parser.add_argument("--workers", type=int, default=4, help="Number of parallel Selenium workers")
args = parser.parse_args()
process_jsonl_file(args.file_path, max_workers=args.workers)
+1 -1
View File
@@ -27,7 +27,7 @@ DEFAULT_PARAMS = [
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
]
NUM_RANDOM_CLAIMS = 2000
NUM_RANDOM_CLAIMS = 200
INPUT_FILE = "../../data/input.jsonl"
OUTPUT_FILE = "../../data/claims.json"