FEAT: implement temp version of main tooling feedback loop

2026-02-09 20:25:36 +00:00
parent 5841e8a922
commit cd2c8621e8
13 changed files with 119 additions and 72 deletions
@@ -1 +1,6 @@
-OPENAI_API_KEY=123456
+OPENAI_API_KEY=123456
 LANGSMITH_TRACING=true
 LANGSMITH_API_KEY=123456
 LANGSMITH_ENDPOINT=https://eu.api.smith.langchain.com
 SCRAPER_INSTANCE=https://example.com
 SCRAPER_PARAM_ANYTHING=else
@@ -3,20 +3,21 @@ import { MessagesState } from "./state";
 import { createToolNode } from "./nodes/tool";
 import { createToolConditional } from "./conditionals/tool_end";
 import { normalizationSetup } from "./nodes/normalizationSetup";
-import { arithmeticToolsByName } from "./tools/arithmetic"
+import { triggerEventToolsByName } from "./tools/triggerEventTools"
 import { createDummyModelNode } from "./nodes/dummyModel";
 import { verificationSetup } from "./nodes/verificationSetup";
 import { dummyRagasMetrics } from "./nodes/dummyRagasMetrics";
 import { produceRanking } from "./nodes/produceRanking";
 import { createModelNode } from "./nodes/model";
-const triggerEventToolNode = createToolNode(arithmeticToolsByName);
+const triggerEventToolNode = createToolNode(triggerEventToolsByName);
-const verificationToolNode = createToolNode(arithmeticToolsByName);
+const verificationToolNode = createToolNode([]);
 const dummyTriggerEventModel = createDummyModelNode("Trigger Events of");
 const dummyVerificationModel = createDummyModelNode("verification of");
 const normalisationModel = createModelNode([], "normalization.txt");
 const triggerEventModel = createModelNode(triggerEventToolsByName, "trigger.txt");
 const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name);
 const verificationToolConditional = createToolConditional("verificationToolNode", produceRanking.name);
@@ -30,7 +31,7 @@ const agent = new StateGraph(MessagesState)
  .addNode("normalisationModel", normalisationModel)
  .addNode("triggerEventToolNode", triggerEventToolNode)
-  .addNode("dummyTriggerEventModel", dummyTriggerEventModel)
+  .addNode("triggerEventModel", triggerEventModel)
  .addNode(verificationSetup.name, verificationSetup)
  .addNode("dummyVerificationModel", dummyVerificationModel)
@@ -40,11 +41,11 @@ const agent = new StateGraph(MessagesState)
  .addEdge(START, normalizationSetup.name)
  .addEdge(normalizationSetup.name, "normalisationModel")
-  .addEdge("normalisationModel", "dummyTriggerEventModel")
+  .addEdge("normalisationModel", "triggerEventModel")
  // @ts-expect-error
-  .addConditionalEdges("dummyTriggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
+  .addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
-  .addEdge("triggerEventToolNode", "dummyTriggerEventModel")
+  .addEdge("triggerEventToolNode", "triggerEventModel")
  .addEdge(verificationSetup.name, "dummyVerificationModel")
  .addEdge(verificationSetup.name, dummyRagasMetrics.name)
@@ -6,12 +6,6 @@ export function createToolConditional(a: String, b: String): ConditionalEdgeRout
  // @ts-expect-error
  var genericToolConditional: ConditionalEdgeRouter<typeof MessagesState, String> = (state) => {
    const lastMessage = state.messages.at(-1);
    //STARTTEMP
    if (lastMessage?.content?.toString().indexOf("qwe") != -1) {
      return a
    }
    //ENDTEMP
    // Check if it's an AIMessage before accessing tool_calls
    if (!lastMessage || !AIMessage.isInstance(lastMessage)) {
@@ -6,12 +6,12 @@ import { hydratePrompt } from "../prompts/hydratePrompt";
 export function createModelNode(tools: any, promptPath: string): GraphNode<typeof MessagesState> {
    return async (state) => {
-        const sysPrompt = hydratePrompt(promptPath, state.disinformationTitle)
+        const sysPrompt = hydratePrompt(promptPath, state);
        const model = new ChatOpenAI({
            model: "gpt-5-mini"
        });
-        const modelWithTools = model.bindTools(tools);
+        const modelWithTools = model.bindTools(Object.values(tools));
        const response = await modelWithTools.invoke([
            new SystemMessage(
@@ -6,10 +6,6 @@ export function createToolNode(tools: any): GraphNode<typeof MessagesState> {
  return async (state) => {
    const lastMessage = state.messages.at(-1);
    //STARTTEMP
    return {messages: [new AIMessage("yeman")]}
    //ENDTEMP
    if (lastMessage == null || !AIMessage.isInstance(lastMessage)) {
      return { messages: [] };
    }
@@ -1,9 +1,12 @@
 import fs from "fs";
-export function hydratePrompt(path: string, replacement: string) {
+export function hydratePrompt(path: string, state: any) {
    // TODO: expand into full context-based replacement engine
    let raw = fs.readFileSync("prompts/" + path, "utf-8");
-    return raw.replace("###", replacement)
+    raw = raw.replace("###TITLE###", state.disinformationTitle);
-}
+    raw = raw.replace("###LM###", state.messages.at(-1).content);
    return raw;
 }
@@ -14,6 +14,6 @@ DISINFORMATION CLAIM: something is NOT true BECOMES something is true
 Relevent examples are included in preceeding messages, use these as exact inspiration.
 The claim to normalize is:
-###
+###TITLE###
 Produce no other text other than the condensed claim.
@@ -0,0 +1,26 @@
 You are an agent in a pipeline to analyse disinformation. 
 Once the information has been created as below, a dataset can be created to feed a model for prediction, which will improve pre-bunking efforts.  
 There is a false disinformation claim circulating:
 ###LM###
 Produce up-to 5 specific "trigger events" that happened that could have led to the spread of this disinformation.
 Remember the time frame of the disinformation campaign: {{CAMPAIGN_DATE}}
 Include no information or events that would not have been available at the time. 
 Produce no more text other than the json.
 Include a concise but specific search query that can be looked up on a search engine in order to allow for the verification. 
 Include a url to a source for your trigger event (not a web search, a specific url from a reputuable source). Do not use OAI cite, include url as text in response.
 "The COVID-19 Pandemic","Escalation of Russia-Ukraine conflict" are too general, cannot be a trigger event
 If you are referencing another disinformation campaign, provide the specific narrative used, not just sentiment, and ensure it is sufficiently different from the claim we are analysing.
 Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,IsItselfDisinformation".
 Multiple tool invocations should be requested at once, if applicable.
 Use your abilities to look between the lines and produce some insightful analysis, thinking both short and long term.
 Lets go through it step by step
@@ -1,40 +0,0 @@
 import { tool } from "@langchain/core/tools";
 import * as z from "zod";
 // Define tools
 const add = tool(({ a, b }) => a + b, {
  name: "add",
  description: "Add two numbers",
  schema: z.object({
    a: z.number().describe("First number"),
    b: z.number().describe("Second number"),
  }),
 });
 const multiply = tool(({ a, b }) => a * b, {
  name: "multiply",
  description: "Multiply two numbers",
  schema: z.object({
    a: z.number().describe("First number"),
    b: z.number().describe("Second number"),
  }),
 });
 const divide = tool(({ a, b }) => a / b, {
  name: "divide",
  description: "Divide two numbers",
  schema: z.object({
    a: z.number().describe("First number"),
    b: z.number().describe("Second number"),
  }),
 });
 // Augment the LLM with tools
 export const arithmeticToolsByName = {
  [add.name]: add,
  [multiply.name]: multiply,
  [divide.name]: divide,
 };
 //const arithmeticTools = Object.values(arithmeticToolsByName);
@@ -3,6 +3,7 @@ import fs from "fs";
 import { pipeline, cos_sim } from "@huggingface/transformers";
 import { logger } from "../../utils/logger";
 //TODO, am getting duplicates, is it from the multi files?
 const CSV_PATHS = [
  "./tools/clan/dev-eng.csv",
  // "./tools/clan/test-eng.csv",
@@ -0,0 +1,45 @@
 import { tool } from "@langchain/core/tools";
 import * as z from "zod";
 import { queryScraper } from "./webSearch";
 import { extractWebpageContent } from "./webpageFetch";
 function rankAndDisplayData(data: string[]):string {
  //TODO: hybrid re-ranking of the provided data
  return data.join("\n")
 }
 // Define tools
 const webSearch = tool(
  async ({ a }) => {
    const data = await queryScraper(a);
    return rankAndDisplayData(data);
  },
  {
    name: "WebSearch",
    description: "Search DuckDuckGo for the provided query",
    schema: z.object({
      a: z.string().describe("Search term"),
    }),
  }
 );
 const openWebpage = tool(
  async ({ a }) => {
    const data = await extractWebpageContent(a);
    return rankAndDisplayData(data);
  },
  {
    name: "OpenWebpage",
    description: "Opens webpage and returns most relevent snippets",
    schema: z.object({
      a: z.string().describe("URL"),
    }),
  }
 );
 // Augment the LLM with tools
 export const triggerEventToolsByName = {
  [webSearch.name]: webSearch,
  [openWebpage.name]: openWebpage
 };
@@ -1,6 +1,6 @@
 import axios from "axios";
-export async function queryScraper(query: string) {
+export async function queryScraper(query: string): Promise<string[]> {
    const instance = process.env.SCRAPER_INSTANCE;
    if (!instance) {
        throw new Error("SCRAPER_INSTANCE environment variable is not set");
@@ -10,8 +10,8 @@ export async function queryScraper(query: string) {
    const url = `${instance}/api/v1/web`;
-    const params : Record<string, string> = Object.entries(process.env)
+    const params: Record<string, string> = Object.entries(process.env)
-        .filter(([key, value]) => key.startsWith("SCRAPER_PARAM_") && value !== undefined) 
+        .filter(([key, value]) => key.startsWith("SCRAPER_PARAM_") && value !== undefined)
        .reduce((acc: Record<string, string>, [key, value]) => {
            const paramName = key.replace(/^SCRAPER_PARAM_/, "").toLowerCase();
            acc[paramName] = value!;
@@ -35,12 +35,23 @@ export async function queryScraper(query: string) {
    const data = response.data;
    // Basic validation
    if (data?.status !== "ok") {
        throw new Error(`API returned status: ${data?.status}`);
    }
-    return data;
+    // TEMP?: Convert API results to array of formatted strings.
    const context = data.web ?? [];
    const lines: string[] = context.map((item: any) => {
        const title = (item.title ?? "").trim();
        const desc = (item.description ?? "").trim();
        const link = (item.url ?? "").trim();
        return `- ${title}\n  ${desc}\n  ${link}`;
    });
    return lines;
 }
@@ -1,7 +1,7 @@
 import { Builder, Browser } from "selenium-webdriver";
 import firefox from "selenium-webdriver/firefox";
-async function extractWebpageContent(url: string) : Promise<string>{
+export async function extractWebpageContent(url: string) : Promise<string[]>{
    const options = new firefox.Options();
    options.addArguments("--headless");
@@ -18,7 +18,12 @@ async function extractWebpageContent(url: string) : Promise<string>{
            "return document.body.innerText;"
        ) as string;
-        return readableText
+        const filteredLines = readableText
            .split(/\r?\n/)
            .map(line => line.trim())
            .filter(line => line.split(/\s+/).length > 1); 
        return filteredLines;
    } finally {
        await driver.quit()
    }