3 Commits

Author SHA1 Message Date
William Jeynes d21a8b537e Add new accuracy results 2026-04-05 11:50:53 +01:00
William Jeynes 42cf4da794 Why no tool use? 2026-04-04 23:47:21 +01:00
William Jeynes f303ca9ea4 Switch to 4o mini 2026-04-04 23:11:39 +01:00
9 changed files with 36 additions and 34 deletions
+3 -14
View File
@@ -1,22 +1,9 @@
# AI models for identifying trigger events in disinformation analysis # AI models for identifying trigger events in disinformation analysis
Final Dissertation Submission Repository Final Dissertation Submission Repository
## Abstract ## Project Description
-- todo -- -- todo --
[Project Presentation](https://jillweynes.github.io/LLMsForDisinformationPrediction-GraphVizBuilt/presentation)
## Generated Database Link and Usage Experiments
Generated Dataset Link: [https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset](https://huggingface.co/datasets/WillJeynes/LLMsForDisinformationAnalysis-Dataset)
Graph-Based Dataset Visualisation: [https://jillweynes.github.io/LLMsForDisinformationPrediction-GraphVizBuilt/](https://jillweynes.github.io/LLMsForDisinformationPrediction-GraphVizBuilt/)
Usage Experiments (incl graph visualisation) Source Code: [https://github.com/WillJeynes/LLMsForDisinformationPrediction](https://github.com/WillJeynes/LLMsForDisinformationPrediction)
# This repository:
## Solution Diagram ## Solution Diagram
-- todo -- -- todo --
@@ -26,6 +13,8 @@ Usage Experiments (incl graph visualisation) Source Code: [https://github.com/Wi
## Agent Refinement ## Agent Refinement
[See agent](/agent/) [See agent](/agent/)
## Generated Database Link and Usage Experiments
-- todo --
## Repository Structure ## Repository Structure
``` ```
+2 -3
View File
@@ -27,6 +27,5 @@ Experiments with different model types:
|-------------------------------|----------:|---------:| |-------------------------------|----------:|---------:|
| gpt-5-mini | 22/405 | 5.43 | | gpt-5-mini | 22/405 | 5.43 |
| gpt-5.4-mini | 29/278 | 10.43 | | gpt-5.4-mini | 29/278 | 10.43 |
| gpt-5.4-nano | 6/210 | 2.85 | | llama3.1:8b-instruct-q4_K_M | ? | ? |
| gpt-4.1-mini | 15/269 | 5.57 | | qwen3.5:9b | 0 | 0 |
| gpt-4o-mini | 27/287 | 9.407 |
+1 -1
View File
@@ -9,7 +9,7 @@ export function createModelNode(tools: any, promptPath: string): GraphNode<typeo
const sysPrompt = await hydratePrompt(promptPath, state); const sysPrompt = await hydratePrompt(promptPath, state);
const model = new ChatOpenAI({ const model = new ChatOpenAI({
model: "gpt-5-mini" model: "gpt-4o-mini"
}); });
const modelWithTools = model.bindTools(Object.values(tools)); const modelWithTools = model.bindTools(Object.values(tools));
+9
View File
@@ -0,0 +1,9 @@
Could the following real-world event:
###TECLAIM###
Be a trigger for the following disinformation:
###TITLE###
Respond with "RELATION", followed by : followed by a confidence score (VERYHIGH, HIGH, MEDIUM, LOW, VERYLOW) followed by : followed by the reason. Use no other words, just return the score and reason in format.
Ignore wether the event happened or not, purely consider the likiness of causation
+9
View File
@@ -8,6 +8,10 @@ Produce up-to 5 specific "trigger events" that happened that could have led to t
Remember the time frame of the disinformation campaign: ###CDATE### Remember the time frame of the disinformation campaign: ###CDATE###
Include no information or events that would not have been available at the time. Include no information or events that would not have been available at the time.
You MEED TO use the tools available to you in order to produce up to date information on URL and search query, else you will be wrong and the analysis invalid.
You NEED TO use the web search and open URL tools to ensure page validity or else all work upto this point will have to be discarded.
Produce no more text other than the json. Produce no more text other than the json.
Include a concise but specific search query that can be looked up on a search engine in order to allow for the verification. Include a concise but specific search query that can be looked up on a search engine in order to allow for the verification.
@@ -26,4 +30,9 @@ Events will be reordered as part of processing, each statement must stand alone
The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given. The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
Analysis should only be completed for proposed events that would graner >0.7 points Analysis should only be completed for proposed events that would graner >0.7 points
This pipeline is running well pasy your knowledge cutoff.
Any URLs will change signigicantly over time.
You MEED TO use the tools available to you in order to produce up to date information on URL and search query, else you will be wrong and the analysis invalid.
You NEED TO use the web search and open URL tools to ensure page validity or else all work upto this point will have to be discarded.
Lets go through it step by step Lets go through it step by step
+8
View File
@@ -0,0 +1,8 @@
Do the search results cited below
###TESEARCH###
Support the idea that the following happened:
###TECLAIM###
Respond with "CONFIDENCE", followed by : followed by a confidence score (VERYHIGH, HIGH, MEDIUM, LOW, VERYLOW) followed by : followed by the reason. Use no other words, just return the score and reason in format.
Dates can be off by a few days, that would still be valid
+1 -1
View File
@@ -5,7 +5,7 @@ set -e
run_agent () { run_agent () {
echo "Starting LangGraph agent..." echo "Starting LangGraph agent..."
cd agent cd agent
npx @langchain/langgraph-cli@1.1.17 dev npx @langchain/langgraph-cli dev --host 127.0.0.1
} }
run_ensemble_service () { run_ensemble_service () {
+2 -14
View File
@@ -19,9 +19,6 @@ const MODE = process.env.MODE ?? "claim";
const MAX_CONCURRENCY = 5; const MAX_CONCURRENCY = 5;
const OFFSET = parseInt(process.env.OFFSET ?? "0", 10);
const LIMIT = process.env.LIMIT ? parseInt(process.env.LIMIT, 10) : null;
const client = new Client({ apiUrl: API_URL }); const client = new Client({ apiUrl: API_URL });
@@ -167,18 +164,9 @@ async function processRecord(record: any): Promise<ResultRecord> {
async function main() { async function main() {
console.log("Reading input file..."); console.log("Reading input file...");
const allRecords = await loadInputs(); const records = await loadInputs();
console.log(`Loaded ${allRecords.length} records`); console.log(`Loaded ${records.length} records`);
const records = allRecords.slice(
OFFSET,
LIMIT !== null ? OFFSET + LIMIT : undefined
);
console.log(
`Processing ${records.length} records (offset=${OFFSET}, limit=${LIMIT ?? "∞"})`
);
fs.writeFileSync(OUTPUT_FILE, "", { flag: "a" }); fs.writeFileSync(OUTPUT_FILE, "", { flag: "a" });
+1 -1
View File
@@ -27,7 +27,7 @@ DEFAULT_PARAMS = [
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake ("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
] ]
NUM_RANDOM_CLAIMS = 2000 NUM_RANDOM_CLAIMS = 200
INPUT_FILE = "../../data/input.jsonl" INPUT_FILE = "../../data/input.jsonl"
OUTPUT_FILE = "../../data/claims.json" OUTPUT_FILE = "../../data/claims.json"