5 Commits

Author SHA1 Message Date
William Jeynes 38ca7a3d34 I can't affort the full model lol. Use jsonrepair module to fix agent malformed JSON instead. 2026-03-26 15:37:14 +00:00
William Jeynes 38b6fb6a0e Use an even better model 2026-03-26 15:14:43 +00:00
William Jeynes c7cccb87c3 Update to 5.4 mini 2026-03-26 12:44:01 +00:00
William Jeynes fd0674e96a Add a chain of thought to the main prompt 2026-03-26 12:33:43 +00:00
William Jeynes 5e374a8bd6 Fix errors seen during longer runs: selenium exceptions, insecure certificates, recusrsion limit exceeded, BM25 document corpus too small 2026-03-26 12:22:13 +00:00
8 changed files with 125 additions and 34 deletions
+1 -1
View File
@@ -9,7 +9,7 @@ export function createModelNode(tools: any, promptPath: string): GraphNode<typeo
const sysPrompt = await hydratePrompt(promptPath, state); const sysPrompt = await hydratePrompt(promptPath, state);
const model = new ChatOpenAI({ const model = new ChatOpenAI({
model: "gpt-5-mini" model: "gpt-5.4-mini"
}); });
const modelWithTools = model.bindTools(Object.values(tools)); const modelWithTools = model.bindTools(Object.values(tools));
+5 -3
View File
@@ -1,8 +1,7 @@
import { GraphNode } from "@langchain/langgraph"; import { GraphNode } from "@langchain/langgraph";
import { MessagesState, ProposedTriggerEventArray } from "../state"; import { MessagesState, ProposedTriggerEventArray } from "../state";
import { logger } from "../utils/logger"; import { logger } from "../utils/logger";
import { queryScraper } from "../tools/webSearch"; import { jsonrepair } from 'jsonrepair'
import { rankAndDisplayData } from "../tools/triggerEventTools";
export const verificationSetup: GraphNode<typeof MessagesState> = async (state) => { export const verificationSetup: GraphNode<typeof MessagesState> = async (state) => {
//this is kinda doing two things, but having two nodes for it seems overkill //this is kinda doing two things, but having two nodes for it seems overkill
@@ -11,7 +10,10 @@ export const verificationSetup: GraphNode<typeof MessagesState> = async (state)
logger.warn("No trigger events in memory, parsing") logger.warn("No trigger events in memory, parsing")
let genResponse = state.messages.at(-1)?.content.toString() ?? ""; let genResponse = state.messages.at(-1)?.content.toString() ?? "";
const parsed = ProposedTriggerEventArray.parse(JSON.parse(genResponse));
const repaired = jsonrepair(genResponse);
const parsed = ProposedTriggerEventArray.parse(JSON.parse(repaired));
for (let i = 0; i < parsed.length; i++) { for (let i = 0; i < parsed.length; i++) {
const search = parsed[i].SearchQuery const search = parsed[i].SearchQuery
+10
View File
@@ -20,6 +20,7 @@
"dotenv": "^17.2.3", "dotenv": "^17.2.3",
"exponential-backoff": "^3.1.3", "exponential-backoff": "^3.1.3",
"fs": "^0.0.1-security", "fs": "^0.0.1-security",
"jsonrepair": "^3.13.3",
"langchain": "^1.2.14", "langchain": "^1.2.14",
"selenium-webdriver": "^4.40.0", "selenium-webdriver": "^4.40.0",
"tldts": "^7.0.23", "tldts": "^7.0.23",
@@ -2075,6 +2076,15 @@
"integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==", "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
"license": "ISC" "license": "ISC"
}, },
"node_modules/jsonrepair": {
"version": "3.13.3",
"resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.13.3.tgz",
"integrity": "sha512-BTznj0owIt2CBAH/LTo7+1I5pMvl1e1033LRl/HUowlZmJOIhzC0zbX5bxMngLkfT4WnzPP26QnW5wMr2g9tsQ==",
"license": "ISC",
"bin": {
"jsonrepair": "bin/cli.js"
}
},
"node_modules/jszip": { "node_modules/jszip": {
"version": "3.10.1", "version": "3.10.1",
"resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz",
+1
View File
@@ -24,6 +24,7 @@
"dotenv": "^17.2.3", "dotenv": "^17.2.3",
"exponential-backoff": "^3.1.3", "exponential-backoff": "^3.1.3",
"fs": "^0.0.1-security", "fs": "^0.0.1-security",
"jsonrepair": "^3.13.3",
"langchain": "^1.2.14", "langchain": "^1.2.14",
"selenium-webdriver": "^4.40.0", "selenium-webdriver": "^4.40.0",
"tldts": "^7.0.23", "tldts": "^7.0.23",
+4
View File
@@ -26,4 +26,8 @@ Events will be reordered as part of processing, each statement must stand alone
The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given. The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
Analysis should only be completed for proposed events that would graner >0.7 points Analysis should only be completed for proposed events that would graner >0.7 points
First, consider a range of directions in which the proposed disinformation could have been influenced by.
Then, research these directions in turn, using the tools at hand.
Finally, refine your proposed "trigger event" until it is specific, quantifiable and backed up by evidence.
Lets go through it step by step Lets go through it step by step
+16 -2
View File
@@ -15,6 +15,8 @@ const CACHE_PATH = "../data/csv.cache.json";
const JSONL_PATH = "../data/input.jsonl" const JSONL_PATH = "../data/input.jsonl"
const BM25_MIN_DOCS = 3;
type EmbeddingCache = { type EmbeddingCache = {
rawtexts: string[]; rawtexts: string[];
cleantexts: string[]; cleantexts: string[];
@@ -287,8 +289,20 @@ async function embedText(text: string): Promise<number[]> {
} }
function buildBM25(texts: string[]) { function buildBM25(texts: string[]) {
logger.info("Building BM25 index (%s docs)...", texts.length); let paddedTexts = texts;
if (texts.length < BM25_MIN_DOCS) {
const needed = BM25_MIN_DOCS - texts.length;
logger.error(
"Corpus too small for BM25 (%s docs, need %s+), padding with %s dummy doc(s)",
texts.length,
BM25_MIN_DOCS,
needed
);
paddedTexts = [...texts, ...Array(needed).fill("placeholder dummy document")];
}
logger.info("Building BM25 index (%s docs)...", paddedTexts.length);
const bm25 = bm25Factory(); const bm25 = bm25Factory();
bm25.defineConfig({ bm25.defineConfig({
@@ -302,7 +316,7 @@ function buildBM25(texts: string[]) {
nlp.tokens.removeWords, nlp.tokens.removeWords,
]); ]);
texts.forEach((text, i) => { paddedTexts.forEach((text, i) => {
bm25.addDoc({ text }, i); bm25.addDoc({ text }, i);
}); });
+87 -27
View File
@@ -1,32 +1,92 @@
import { Builder, Browser } from "selenium-webdriver"; import { Builder, Browser } from "selenium-webdriver";
import firefox from "selenium-webdriver/firefox"; import firefox from "selenium-webdriver/firefox";
import { backOff } from "exponential-backoff";
import { logger } from "../utils/logger";
export async function extractWebpageContent(url: string) : Promise<string[]>{ export async function extractWebpageContent(url: string): Promise<string[]> {
const options = new firefox.Options(); try {
options.addArguments("--headless"); const response = await backOff(async () => {
return await extractWebpageContentWorker(url);
let driver = await new Builder().forBrowser(Browser.FIREFOX).setFirefoxOptions(options).build() }, {
try { numOfAttempts: 5,
await driver.get(url) startingDelay: 500,
await driver.wait(async () => { timeMultiple: 2,
return await driver.executeScript( jitter: "full",
"return document.readyState === 'complete'" maxDelay: 50000,
); });
}, 5000); return response;
} catch (err: any) {
const readableText = await driver.executeScript( logger.error(`Failed out of retry loop for URL "${url}", returning placeholder to pipeline`);
"return document.body.innerText;" return ["API EXCEPTION"];
) as string; }
const filteredLines = readableText
.split(/\r?\n/)
.map(line => line.trim())
.filter(line => line.split(/\s+/).length > 1);
return filteredLines;
} finally {
await driver.quit()
}
} }
//console.log(await extractWebpageContent("https://www.bbc.co.uk/news/live/c74wd01egvyt")) async function extractWebpageContentWorker(url: string): Promise<string[]> {
let driver;
try {
const options = new firefox.Options();
options.addArguments("--headless");
driver = await new Builder()
.forBrowser(Browser.FIREFOX)
.setFirefoxOptions(options)
.build();
} catch (err: any) {
const desc = `Failed to launch Firefox driver: ${err.message}`;
logger.error(desc);
throw new Error(desc);
}
try {
try {
await driver.get(url);
} catch (err: any) {
const desc = `Failed to navigate to URL "${url}": ${err.message}`;
logger.error(desc);
throw new Error(desc);
}
try {
await driver.wait(async () => {
return await driver.executeScript(
"return document.readyState === 'complete'"
);
}, 5000);
} catch (err: any) {
logger.error(`Page load timed out for "${url}", attempting to read partial content: ${err.message}`);
// do not throw, attempt to read
}
let readableText: string;
try {
readableText = await driver.executeScript(
"return document.body.innerText;"
) as string;
} catch (err: any) {
const desc = `Failed to extract page text from "${url}": ${err.message}`;
logger.error(desc);
throw new Error(desc);
}
const filteredLines = readableText
.split(/\r?\n/)
.map(line => line.trim())
.filter(line => line.split(/\s+/).length > 1);
if (filteredLines.length === 0) {
const desc = `No content extracted from "${url}"`;
logger.error(desc);
throw new Error(desc);
}
return filteredLines;
} finally {
try {
await driver.quit();
} catch (err: any) {
logger.error(`Failed to quit Firefox driver cleanly: ${err.message}`);
}
}
}
// console.log(await extractWebpageContent("https://www.bbc.co.uk/news/live/c74wd01egvyt"))
// console.log(await extractWebpageContent("https://badcertificate.int.jeynes.uk/"))
+1 -1
View File
@@ -118,7 +118,7 @@ async function processRecord(record: any): Promise<ResultRecord> {
input: buildAgentInput(record), input: buildAgentInput(record),
streamMode: "values", streamMode: "values",
config: { config: {
recursion_limit: 50 recursion_limit: 100
} }
}); });