From cd2c8621e8263e6c17536cad8a08bfa24a3b2370 Mon Sep 17 00:00:00 2001 From: William Jeynes Date: Mon, 9 Feb 2026 20:25:36 +0000 Subject: [PATCH] FEAT: implement temp version of main tooling feedback loop --- agent/.env.example | 7 ++++- agent/agent.ts | 17 ++++++----- agent/conditionals/tool_end.ts | 6 ---- agent/nodes/model.ts | 4 +-- agent/nodes/tool.ts | 4 --- agent/prompts/hydratePrompt.ts | 9 ++++-- agent/prompts/normalization.txt | 2 +- agent/prompts/trigger.txt | 26 ++++++++++++++++ agent/tools/arithmetic.ts | 40 ------------------------- agent/tools/clan/retreiveExamples.ts | 1 + agent/tools/triggerEventTools.ts | 45 ++++++++++++++++++++++++++++ agent/tools/webSearch.ts | 21 +++++++++---- agent/tools/webpageFetch.ts | 9 ++++-- 13 files changed, 119 insertions(+), 72 deletions(-) create mode 100644 agent/prompts/trigger.txt delete mode 100644 agent/tools/arithmetic.ts create mode 100644 agent/tools/triggerEventTools.ts diff --git a/agent/.env.example b/agent/.env.example index 1528803..c1ca87d 100644 --- a/agent/.env.example +++ b/agent/.env.example @@ -1 +1,6 @@ -OPENAI_API_KEY=123456 \ No newline at end of file +OPENAI_API_KEY=123456 +LANGSMITH_TRACING=true +LANGSMITH_API_KEY=123456 +LANGSMITH_ENDPOINT=https://eu.api.smith.langchain.com +SCRAPER_INSTANCE=https://example.com +SCRAPER_PARAM_ANYTHING=else \ No newline at end of file diff --git a/agent/agent.ts b/agent/agent.ts index e16dd72..3213059 100644 --- a/agent/agent.ts +++ b/agent/agent.ts @@ -3,20 +3,21 @@ import { MessagesState } from "./state"; import { createToolNode } from "./nodes/tool"; import { createToolConditional } from "./conditionals/tool_end"; import { normalizationSetup } from "./nodes/normalizationSetup"; -import { arithmeticToolsByName } from "./tools/arithmetic" +import { triggerEventToolsByName } from "./tools/triggerEventTools" import { createDummyModelNode } from "./nodes/dummyModel"; import { verificationSetup } from "./nodes/verificationSetup"; import { dummyRagasMetrics } from "./nodes/dummyRagasMetrics"; import { produceRanking } from "./nodes/produceRanking"; import { createModelNode } from "./nodes/model"; -const triggerEventToolNode = createToolNode(arithmeticToolsByName); -const verificationToolNode = createToolNode(arithmeticToolsByName); +const triggerEventToolNode = createToolNode(triggerEventToolsByName); +const verificationToolNode = createToolNode([]); -const dummyTriggerEventModel = createDummyModelNode("Trigger Events of"); const dummyVerificationModel = createDummyModelNode("verification of"); const normalisationModel = createModelNode([], "normalization.txt"); +const triggerEventModel = createModelNode(triggerEventToolsByName, "trigger.txt"); + const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name); const verificationToolConditional = createToolConditional("verificationToolNode", produceRanking.name); @@ -30,7 +31,7 @@ const agent = new StateGraph(MessagesState) .addNode("normalisationModel", normalisationModel) .addNode("triggerEventToolNode", triggerEventToolNode) - .addNode("dummyTriggerEventModel", dummyTriggerEventModel) + .addNode("triggerEventModel", triggerEventModel) .addNode(verificationSetup.name, verificationSetup) .addNode("dummyVerificationModel", dummyVerificationModel) @@ -40,11 +41,11 @@ const agent = new StateGraph(MessagesState) .addEdge(START, normalizationSetup.name) .addEdge(normalizationSetup.name, "normalisationModel") - .addEdge("normalisationModel", "dummyTriggerEventModel") + .addEdge("normalisationModel", "triggerEventModel") // @ts-expect-error - .addConditionalEdges("dummyTriggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name]) - .addEdge("triggerEventToolNode", "dummyTriggerEventModel") + .addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name]) + .addEdge("triggerEventToolNode", "triggerEventModel") .addEdge(verificationSetup.name, "dummyVerificationModel") .addEdge(verificationSetup.name, dummyRagasMetrics.name) diff --git a/agent/conditionals/tool_end.ts b/agent/conditionals/tool_end.ts index 357180a..b1bf35f 100644 --- a/agent/conditionals/tool_end.ts +++ b/agent/conditionals/tool_end.ts @@ -6,12 +6,6 @@ export function createToolConditional(a: String, b: String): ConditionalEdgeRout // @ts-expect-error var genericToolConditional: ConditionalEdgeRouter = (state) => { const lastMessage = state.messages.at(-1); - - //STARTTEMP - if (lastMessage?.content?.toString().indexOf("qwe") != -1) { - return a - } - //ENDTEMP // Check if it's an AIMessage before accessing tool_calls if (!lastMessage || !AIMessage.isInstance(lastMessage)) { diff --git a/agent/nodes/model.ts b/agent/nodes/model.ts index a69c265..008d830 100644 --- a/agent/nodes/model.ts +++ b/agent/nodes/model.ts @@ -6,12 +6,12 @@ import { hydratePrompt } from "../prompts/hydratePrompt"; export function createModelNode(tools: any, promptPath: string): GraphNode { return async (state) => { - const sysPrompt = hydratePrompt(promptPath, state.disinformationTitle) + const sysPrompt = hydratePrompt(promptPath, state); const model = new ChatOpenAI({ model: "gpt-5-mini" }); - const modelWithTools = model.bindTools(tools); + const modelWithTools = model.bindTools(Object.values(tools)); const response = await modelWithTools.invoke([ new SystemMessage( diff --git a/agent/nodes/tool.ts b/agent/nodes/tool.ts index 3bdfe88..0a1f3f2 100644 --- a/agent/nodes/tool.ts +++ b/agent/nodes/tool.ts @@ -6,10 +6,6 @@ export function createToolNode(tools: any): GraphNode { return async (state) => { const lastMessage = state.messages.at(-1); - //STARTTEMP - return {messages: [new AIMessage("yeman")]} - //ENDTEMP - if (lastMessage == null || !AIMessage.isInstance(lastMessage)) { return { messages: [] }; } diff --git a/agent/prompts/hydratePrompt.ts b/agent/prompts/hydratePrompt.ts index ab80445..dbf4b21 100644 --- a/agent/prompts/hydratePrompt.ts +++ b/agent/prompts/hydratePrompt.ts @@ -1,9 +1,12 @@ import fs from "fs"; -export function hydratePrompt(path: string, replacement: string) { +export function hydratePrompt(path: string, state: any) { // TODO: expand into full context-based replacement engine let raw = fs.readFileSync("prompts/" + path, "utf-8"); - return raw.replace("###", replacement) -} \ No newline at end of file + raw = raw.replace("###TITLE###", state.disinformationTitle); + raw = raw.replace("###LM###", state.messages.at(-1).content); + + return raw; +} diff --git a/agent/prompts/normalization.txt b/agent/prompts/normalization.txt index 5c20d9c..1ac0f94 100644 --- a/agent/prompts/normalization.txt +++ b/agent/prompts/normalization.txt @@ -14,6 +14,6 @@ DISINFORMATION CLAIM: something is NOT true BECOMES something is true Relevent examples are included in preceeding messages, use these as exact inspiration. The claim to normalize is: -### +###TITLE### Produce no other text other than the condensed claim. \ No newline at end of file diff --git a/agent/prompts/trigger.txt b/agent/prompts/trigger.txt new file mode 100644 index 0000000..673607a --- /dev/null +++ b/agent/prompts/trigger.txt @@ -0,0 +1,26 @@ +You are an agent in a pipeline to analyse disinformation. +Once the information has been created as below, a dataset can be created to feed a model for prediction, which will improve pre-bunking efforts. + +There is a false disinformation claim circulating: +###LM### +Produce up-to 5 specific "trigger events" that happened that could have led to the spread of this disinformation. + +Remember the time frame of the disinformation campaign: {{CAMPAIGN_DATE}} +Include no information or events that would not have been available at the time. + +Produce no more text other than the json. + +Include a concise but specific search query that can be looked up on a search engine in order to allow for the verification. + +Include a url to a source for your trigger event (not a web search, a specific url from a reputuable source). Do not use OAI cite, include url as text in response. + +"The COVID-19 Pandemic","Escalation of Russia-Ukraine conflict" are too general, cannot be a trigger event + +If you are referencing another disinformation campaign, provide the specific narrative used, not just sentiment, and ensure it is sufficiently different from the claim we are analysing. + +Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,IsItselfDisinformation". + + +Multiple tool invocations should be requested at once, if applicable. +Use your abilities to look between the lines and produce some insightful analysis, thinking both short and long term. +Lets go through it step by step \ No newline at end of file diff --git a/agent/tools/arithmetic.ts b/agent/tools/arithmetic.ts deleted file mode 100644 index f7aadf5..0000000 --- a/agent/tools/arithmetic.ts +++ /dev/null @@ -1,40 +0,0 @@ -import { tool } from "@langchain/core/tools"; -import * as z from "zod"; - - -// Define tools -const add = tool(({ a, b }) => a + b, { - name: "add", - description: "Add two numbers", - schema: z.object({ - a: z.number().describe("First number"), - b: z.number().describe("Second number"), - }), -}); - -const multiply = tool(({ a, b }) => a * b, { - name: "multiply", - description: "Multiply two numbers", - schema: z.object({ - a: z.number().describe("First number"), - b: z.number().describe("Second number"), - }), -}); - -const divide = tool(({ a, b }) => a / b, { - name: "divide", - description: "Divide two numbers", - schema: z.object({ - a: z.number().describe("First number"), - b: z.number().describe("Second number"), - }), -}); - -// Augment the LLM with tools -export const arithmeticToolsByName = { - [add.name]: add, - [multiply.name]: multiply, - [divide.name]: divide, -}; - -//const arithmeticTools = Object.values(arithmeticToolsByName); \ No newline at end of file diff --git a/agent/tools/clan/retreiveExamples.ts b/agent/tools/clan/retreiveExamples.ts index 1522b77..5dcfdd9 100644 --- a/agent/tools/clan/retreiveExamples.ts +++ b/agent/tools/clan/retreiveExamples.ts @@ -3,6 +3,7 @@ import fs from "fs"; import { pipeline, cos_sim } from "@huggingface/transformers"; import { logger } from "../../utils/logger"; +//TODO, am getting duplicates, is it from the multi files? const CSV_PATHS = [ "./tools/clan/dev-eng.csv", // "./tools/clan/test-eng.csv", diff --git a/agent/tools/triggerEventTools.ts b/agent/tools/triggerEventTools.ts new file mode 100644 index 0000000..afca14f --- /dev/null +++ b/agent/tools/triggerEventTools.ts @@ -0,0 +1,45 @@ +import { tool } from "@langchain/core/tools"; +import * as z from "zod"; +import { queryScraper } from "./webSearch"; +import { extractWebpageContent } from "./webpageFetch"; + + +function rankAndDisplayData(data: string[]):string { + //TODO: hybrid re-ranking of the provided data + return data.join("\n") +} + +// Define tools +const webSearch = tool( + async ({ a }) => { + const data = await queryScraper(a); + return rankAndDisplayData(data); + }, + { + name: "WebSearch", + description: "Search DuckDuckGo for the provided query", + schema: z.object({ + a: z.string().describe("Search term"), + }), + } +); + +const openWebpage = tool( + async ({ a }) => { + const data = await extractWebpageContent(a); + return rankAndDisplayData(data); + }, + { + name: "OpenWebpage", + description: "Opens webpage and returns most relevent snippets", + schema: z.object({ + a: z.string().describe("URL"), + }), + } +); + +// Augment the LLM with tools +export const triggerEventToolsByName = { + [webSearch.name]: webSearch, + [openWebpage.name]: openWebpage +}; diff --git a/agent/tools/webSearch.ts b/agent/tools/webSearch.ts index 778c4c1..33bc508 100644 --- a/agent/tools/webSearch.ts +++ b/agent/tools/webSearch.ts @@ -1,6 +1,6 @@ import axios from "axios"; -export async function queryScraper(query: string) { +export async function queryScraper(query: string): Promise { const instance = process.env.SCRAPER_INSTANCE; if (!instance) { throw new Error("SCRAPER_INSTANCE environment variable is not set"); @@ -10,8 +10,8 @@ export async function queryScraper(query: string) { const url = `${instance}/api/v1/web`; - const params : Record = Object.entries(process.env) - .filter(([key, value]) => key.startsWith("SCRAPER_PARAM_") && value !== undefined) + const params: Record = Object.entries(process.env) + .filter(([key, value]) => key.startsWith("SCRAPER_PARAM_") && value !== undefined) .reduce((acc: Record, [key, value]) => { const paramName = key.replace(/^SCRAPER_PARAM_/, "").toLowerCase(); acc[paramName] = value!; @@ -35,12 +35,23 @@ export async function queryScraper(query: string) { const data = response.data; - // Basic validation if (data?.status !== "ok") { throw new Error(`API returned status: ${data?.status}`); } - return data; + // TEMP?: Convert API results to array of formatted strings. + + const context = data.web ?? []; + + const lines: string[] = context.map((item: any) => { + const title = (item.title ?? "").trim(); + const desc = (item.description ?? "").trim(); + const link = (item.url ?? "").trim(); + + return `- ${title}\n ${desc}\n ${link}`; + }); + + return lines; } diff --git a/agent/tools/webpageFetch.ts b/agent/tools/webpageFetch.ts index 3c74d90..8f11fc3 100644 --- a/agent/tools/webpageFetch.ts +++ b/agent/tools/webpageFetch.ts @@ -1,7 +1,7 @@ import { Builder, Browser } from "selenium-webdriver"; import firefox from "selenium-webdriver/firefox"; -async function extractWebpageContent(url: string) : Promise{ +export async function extractWebpageContent(url: string) : Promise{ const options = new firefox.Options(); options.addArguments("--headless"); @@ -18,7 +18,12 @@ async function extractWebpageContent(url: string) : Promise{ "return document.body.innerText;" ) as string; - return readableText + const filteredLines = readableText + .split(/\r?\n/) + .map(line => line.trim()) + .filter(line => line.split(/\s+/).length > 1); + + return filteredLines; } finally { await driver.quit() }