FEAT: implement temp version of main tooling feedback loop

This commit is contained in:
William Jeynes
2026-02-09 20:25:36 +00:00
parent 5841e8a922
commit cd2c8621e8
13 changed files with 119 additions and 72 deletions
+5
View File
@@ -1 +1,6 @@
OPENAI_API_KEY=123456
LANGSMITH_TRACING=true
LANGSMITH_API_KEY=123456
LANGSMITH_ENDPOINT=https://eu.api.smith.langchain.com
SCRAPER_INSTANCE=https://example.com
SCRAPER_PARAM_ANYTHING=else
+9 -8
View File
@@ -3,20 +3,21 @@ import { MessagesState } from "./state";
import { createToolNode } from "./nodes/tool";
import { createToolConditional } from "./conditionals/tool_end";
import { normalizationSetup } from "./nodes/normalizationSetup";
import { arithmeticToolsByName } from "./tools/arithmetic"
import { triggerEventToolsByName } from "./tools/triggerEventTools"
import { createDummyModelNode } from "./nodes/dummyModel";
import { verificationSetup } from "./nodes/verificationSetup";
import { dummyRagasMetrics } from "./nodes/dummyRagasMetrics";
import { produceRanking } from "./nodes/produceRanking";
import { createModelNode } from "./nodes/model";
const triggerEventToolNode = createToolNode(arithmeticToolsByName);
const verificationToolNode = createToolNode(arithmeticToolsByName);
const triggerEventToolNode = createToolNode(triggerEventToolsByName);
const verificationToolNode = createToolNode([]);
const dummyTriggerEventModel = createDummyModelNode("Trigger Events of");
const dummyVerificationModel = createDummyModelNode("verification of");
const normalisationModel = createModelNode([], "normalization.txt");
const triggerEventModel = createModelNode(triggerEventToolsByName, "trigger.txt");
const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name);
const verificationToolConditional = createToolConditional("verificationToolNode", produceRanking.name);
@@ -30,7 +31,7 @@ const agent = new StateGraph(MessagesState)
.addNode("normalisationModel", normalisationModel)
.addNode("triggerEventToolNode", triggerEventToolNode)
.addNode("dummyTriggerEventModel", dummyTriggerEventModel)
.addNode("triggerEventModel", triggerEventModel)
.addNode(verificationSetup.name, verificationSetup)
.addNode("dummyVerificationModel", dummyVerificationModel)
@@ -40,11 +41,11 @@ const agent = new StateGraph(MessagesState)
.addEdge(START, normalizationSetup.name)
.addEdge(normalizationSetup.name, "normalisationModel")
.addEdge("normalisationModel", "dummyTriggerEventModel")
.addEdge("normalisationModel", "triggerEventModel")
// @ts-expect-error
.addConditionalEdges("dummyTriggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
.addEdge("triggerEventToolNode", "dummyTriggerEventModel")
.addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
.addEdge("triggerEventToolNode", "triggerEventModel")
.addEdge(verificationSetup.name, "dummyVerificationModel")
.addEdge(verificationSetup.name, dummyRagasMetrics.name)
-6
View File
@@ -7,12 +7,6 @@ export function createToolConditional(a: String, b: String): ConditionalEdgeRout
var genericToolConditional: ConditionalEdgeRouter<typeof MessagesState, String> = (state) => {
const lastMessage = state.messages.at(-1);
//STARTTEMP
if (lastMessage?.content?.toString().indexOf("qwe") != -1) {
return a
}
//ENDTEMP
// Check if it's an AIMessage before accessing tool_calls
if (!lastMessage || !AIMessage.isInstance(lastMessage)) {
return b;
+2 -2
View File
@@ -6,12 +6,12 @@ import { hydratePrompt } from "../prompts/hydratePrompt";
export function createModelNode(tools: any, promptPath: string): GraphNode<typeof MessagesState> {
return async (state) => {
const sysPrompt = hydratePrompt(promptPath, state.disinformationTitle)
const sysPrompt = hydratePrompt(promptPath, state);
const model = new ChatOpenAI({
model: "gpt-5-mini"
});
const modelWithTools = model.bindTools(tools);
const modelWithTools = model.bindTools(Object.values(tools));
const response = await modelWithTools.invoke([
new SystemMessage(
-4
View File
@@ -6,10 +6,6 @@ export function createToolNode(tools: any): GraphNode<typeof MessagesState> {
return async (state) => {
const lastMessage = state.messages.at(-1);
//STARTTEMP
return {messages: [new AIMessage("yeman")]}
//ENDTEMP
if (lastMessage == null || !AIMessage.isInstance(lastMessage)) {
return { messages: [] };
}
+5 -2
View File
@@ -1,9 +1,12 @@
import fs from "fs";
export function hydratePrompt(path: string, replacement: string) {
export function hydratePrompt(path: string, state: any) {
// TODO: expand into full context-based replacement engine
let raw = fs.readFileSync("prompts/" + path, "utf-8");
return raw.replace("###", replacement)
raw = raw.replace("###TITLE###", state.disinformationTitle);
raw = raw.replace("###LM###", state.messages.at(-1).content);
return raw;
}
+1 -1
View File
@@ -14,6 +14,6 @@ DISINFORMATION CLAIM: something is NOT true BECOMES something is true
Relevent examples are included in preceeding messages, use these as exact inspiration.
The claim to normalize is:
###
###TITLE###
Produce no other text other than the condensed claim.
+26
View File
@@ -0,0 +1,26 @@
You are an agent in a pipeline to analyse disinformation.
Once the information has been created as below, a dataset can be created to feed a model for prediction, which will improve pre-bunking efforts.
There is a false disinformation claim circulating:
###LM###
Produce up-to 5 specific "trigger events" that happened that could have led to the spread of this disinformation.
Remember the time frame of the disinformation campaign: {{CAMPAIGN_DATE}}
Include no information or events that would not have been available at the time.
Produce no more text other than the json.
Include a concise but specific search query that can be looked up on a search engine in order to allow for the verification.
Include a url to a source for your trigger event (not a web search, a specific url from a reputuable source). Do not use OAI cite, include url as text in response.
"The COVID-19 Pandemic","Escalation of Russia-Ukraine conflict" are too general, cannot be a trigger event
If you are referencing another disinformation campaign, provide the specific narrative used, not just sentiment, and ensure it is sufficiently different from the claim we are analysing.
Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,IsItselfDisinformation".
Multiple tool invocations should be requested at once, if applicable.
Use your abilities to look between the lines and produce some insightful analysis, thinking both short and long term.
Lets go through it step by step
-40
View File
@@ -1,40 +0,0 @@
import { tool } from "@langchain/core/tools";
import * as z from "zod";
// Define tools
const add = tool(({ a, b }) => a + b, {
name: "add",
description: "Add two numbers",
schema: z.object({
a: z.number().describe("First number"),
b: z.number().describe("Second number"),
}),
});
const multiply = tool(({ a, b }) => a * b, {
name: "multiply",
description: "Multiply two numbers",
schema: z.object({
a: z.number().describe("First number"),
b: z.number().describe("Second number"),
}),
});
const divide = tool(({ a, b }) => a / b, {
name: "divide",
description: "Divide two numbers",
schema: z.object({
a: z.number().describe("First number"),
b: z.number().describe("Second number"),
}),
});
// Augment the LLM with tools
export const arithmeticToolsByName = {
[add.name]: add,
[multiply.name]: multiply,
[divide.name]: divide,
};
//const arithmeticTools = Object.values(arithmeticToolsByName);
+1
View File
@@ -3,6 +3,7 @@ import fs from "fs";
import { pipeline, cos_sim } from "@huggingface/transformers";
import { logger } from "../../utils/logger";
//TODO, am getting duplicates, is it from the multi files?
const CSV_PATHS = [
"./tools/clan/dev-eng.csv",
// "./tools/clan/test-eng.csv",
+45
View File
@@ -0,0 +1,45 @@
import { tool } from "@langchain/core/tools";
import * as z from "zod";
import { queryScraper } from "./webSearch";
import { extractWebpageContent } from "./webpageFetch";
function rankAndDisplayData(data: string[]):string {
//TODO: hybrid re-ranking of the provided data
return data.join("\n")
}
// Define tools
const webSearch = tool(
async ({ a }) => {
const data = await queryScraper(a);
return rankAndDisplayData(data);
},
{
name: "WebSearch",
description: "Search DuckDuckGo for the provided query",
schema: z.object({
a: z.string().describe("Search term"),
}),
}
);
const openWebpage = tool(
async ({ a }) => {
const data = await extractWebpageContent(a);
return rankAndDisplayData(data);
},
{
name: "OpenWebpage",
description: "Opens webpage and returns most relevent snippets",
schema: z.object({
a: z.string().describe("URL"),
}),
}
);
// Augment the LLM with tools
export const triggerEventToolsByName = {
[webSearch.name]: webSearch,
[openWebpage.name]: openWebpage
};
+15 -4
View File
@@ -1,6 +1,6 @@
import axios from "axios";
export async function queryScraper(query: string) {
export async function queryScraper(query: string): Promise<string[]> {
const instance = process.env.SCRAPER_INSTANCE;
if (!instance) {
throw new Error("SCRAPER_INSTANCE environment variable is not set");
@@ -10,7 +10,7 @@ export async function queryScraper(query: string) {
const url = `${instance}/api/v1/web`;
const params : Record<string, string> = Object.entries(process.env)
const params: Record<string, string> = Object.entries(process.env)
.filter(([key, value]) => key.startsWith("SCRAPER_PARAM_") && value !== undefined)
.reduce((acc: Record<string, string>, [key, value]) => {
const paramName = key.replace(/^SCRAPER_PARAM_/, "").toLowerCase();
@@ -35,12 +35,23 @@ export async function queryScraper(query: string) {
const data = response.data;
// Basic validation
if (data?.status !== "ok") {
throw new Error(`API returned status: ${data?.status}`);
}
return data;
// TEMP?: Convert API results to array of formatted strings.
const context = data.web ?? [];
const lines: string[] = context.map((item: any) => {
const title = (item.title ?? "").trim();
const desc = (item.description ?? "").trim();
const link = (item.url ?? "").trim();
return `- ${title}\n ${desc}\n ${link}`;
});
return lines;
}
+7 -2
View File
@@ -1,7 +1,7 @@
import { Builder, Browser } from "selenium-webdriver";
import firefox from "selenium-webdriver/firefox";
async function extractWebpageContent(url: string) : Promise<string>{
export async function extractWebpageContent(url: string) : Promise<string[]>{
const options = new firefox.Options();
options.addArguments("--headless");
@@ -18,7 +18,12 @@ async function extractWebpageContent(url: string) : Promise<string>{
"return document.body.innerText;"
) as string;
return readableText
const filteredLines = readableText
.split(/\r?\n/)
.map(line => line.trim())
.filter(line => line.split(/\s+/).length > 1);
return filteredLines;
} finally {
await driver.quit()
}