FEAT: implement temp version of main tooling feedback loop

2026-02-09 20:25:36 +00:00
parent 5841e8a922
commit cd2c8621e8
13 changed files with 119 additions and 72 deletions
@@ -1 +1,6 @@
 OPENAI_API_KEY=123456
+LANGSMITH_TRACING=true
+LANGSMITH_API_KEY=123456
+LANGSMITH_ENDPOINT=https://eu.api.smith.langchain.com
+SCRAPER_INSTANCE=https://example.com
+SCRAPER_PARAM_ANYTHING=else
@@ -3,20 +3,21 @@ import { MessagesState } from "./state";
 import { createToolNode } from "./nodes/tool";
 import { createToolConditional } from "./conditionals/tool_end";
 import { normalizationSetup } from "./nodes/normalizationSetup";
-import { arithmeticToolsByName } from "./tools/arithmetic"
+import { triggerEventToolsByName } from "./tools/triggerEventTools"
 import { createDummyModelNode } from "./nodes/dummyModel";
 import { verificationSetup } from "./nodes/verificationSetup";
 import { dummyRagasMetrics } from "./nodes/dummyRagasMetrics";
 import { produceRanking } from "./nodes/produceRanking";
 import { createModelNode } from "./nodes/model";

-const triggerEventToolNode = createToolNode(arithmeticToolsByName);
-const verificationToolNode = createToolNode(arithmeticToolsByName);
+const triggerEventToolNode = createToolNode(triggerEventToolsByName);
+const verificationToolNode = createToolNode([]);

-const dummyTriggerEventModel = createDummyModelNode("Trigger Events of");
 const dummyVerificationModel = createDummyModelNode("verification of");

 const normalisationModel = createModelNode([], "normalization.txt");
+const triggerEventModel = createModelNode(triggerEventToolsByName, "trigger.txt");
+

 const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name);
 const verificationToolConditional = createToolConditional("verificationToolNode", produceRanking.name);
@@ -30,7 +31,7 @@ const agent = new StateGraph(MessagesState)
  .addNode("normalisationModel", normalisationModel)
  
  .addNode("triggerEventToolNode", triggerEventToolNode)
-  .addNode("dummyTriggerEventModel", dummyTriggerEventModel)
+  .addNode("triggerEventModel", triggerEventModel)

  .addNode(verificationSetup.name, verificationSetup)
  .addNode("dummyVerificationModel", dummyVerificationModel)
@@ -40,11 +41,11 @@ const agent = new StateGraph(MessagesState)
  
  .addEdge(START, normalizationSetup.name)
  .addEdge(normalizationSetup.name, "normalisationModel")
-  .addEdge("normalisationModel", "dummyTriggerEventModel")
+  .addEdge("normalisationModel", "triggerEventModel")
  
  // @ts-expect-error
-  .addConditionalEdges("dummyTriggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
-  .addEdge("triggerEventToolNode", "dummyTriggerEventModel")
+  .addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
+  .addEdge("triggerEventToolNode", "triggerEventModel")
  
  .addEdge(verificationSetup.name, "dummyVerificationModel")
  .addEdge(verificationSetup.name, dummyRagasMetrics.name)
@@ -7,12 +7,6 @@ export function createToolConditional(a: String, b: String): ConditionalEdgeRout
  var genericToolConditional: ConditionalEdgeRouter<typeof MessagesState, String> = (state) => {
    const lastMessage = state.messages.at(-1);

-    //STARTTEMP
-    if (lastMessage?.content?.toString().indexOf("qwe") != -1) {
-      return a
-    }
-    //ENDTEMP
-
    // Check if it's an AIMessage before accessing tool_calls
    if (!lastMessage || !AIMessage.isInstance(lastMessage)) {
      return b;
@@ -6,12 +6,12 @@ import { hydratePrompt } from "../prompts/hydratePrompt";

 export function createModelNode(tools: any, promptPath: string): GraphNode<typeof MessagesState> {
    return async (state) => {
-        const sysPrompt = hydratePrompt(promptPath, state.disinformationTitle)
+        const sysPrompt = hydratePrompt(promptPath, state);

        const model = new ChatOpenAI({
            model: "gpt-5-mini"
        });
-        const modelWithTools = model.bindTools(tools);
+        const modelWithTools = model.bindTools(Object.values(tools));

        const response = await modelWithTools.invoke([
            new SystemMessage(
@@ -6,10 +6,6 @@ export function createToolNode(tools: any): GraphNode<typeof MessagesState> {
  return async (state) => {
    const lastMessage = state.messages.at(-1);

-    //STARTTEMP
-    return {messages: [new AIMessage("yeman")]}
-    //ENDTEMP
-
    if (lastMessage == null || !AIMessage.isInstance(lastMessage)) {
      return { messages: [] };
    }
@@ -1,9 +1,12 @@
 import fs from "fs";

-export function hydratePrompt(path: string, replacement: string) {
+export function hydratePrompt(path: string, state: any) {
    // TODO: expand into full context-based replacement engine

    let raw = fs.readFileSync("prompts/" + path, "utf-8");

-    return raw.replace("###", replacement)
+    raw = raw.replace("###TITLE###", state.disinformationTitle);
+    raw = raw.replace("###LM###", state.messages.at(-1).content);
+
+    return raw;
 }
@@ -14,6 +14,6 @@ DISINFORMATION CLAIM: something is NOT true BECOMES something is true
 Relevent examples are included in preceeding messages, use these as exact inspiration.

 The claim to normalize is:
-###
+###TITLE###

 Produce no other text other than the condensed claim.
@@ -0,0 +1,26 @@
+You are an agent in a pipeline to analyse disinformation. 
+Once the information has been created as below, a dataset can be created to feed a model for prediction, which will improve pre-bunking efforts.  
+
+There is a false disinformation claim circulating:
+###LM###
+Produce up-to 5 specific "trigger events" that happened that could have led to the spread of this disinformation.
+
+Remember the time frame of the disinformation campaign: {{CAMPAIGN_DATE}}
+Include no information or events that would not have been available at the time. 
+
+Produce no more text other than the json.
+
+Include a concise but specific search query that can be looked up on a search engine in order to allow for the verification. 
+
+Include a url to a source for your trigger event (not a web search, a specific url from a reputuable source). Do not use OAI cite, include url as text in response.
+
+"The COVID-19 Pandemic","Escalation of Russia-Ukraine conflict" are too general, cannot be a trigger event
+
+If you are referencing another disinformation campaign, provide the specific narrative used, not just sentiment, and ensure it is sufficiently different from the claim we are analysing.
+
+Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,IsItselfDisinformation".
+
+
+Multiple tool invocations should be requested at once, if applicable.
+Use your abilities to look between the lines and produce some insightful analysis, thinking both short and long term.
+Lets go through it step by step
@@ -1,40 +0,0 @@
-import { tool } from "@langchain/core/tools";
-import * as z from "zod";
-
-
-// Define tools
-const add = tool(({ a, b }) => a + b, {
-  name: "add",
-  description: "Add two numbers",
-  schema: z.object({
-    a: z.number().describe("First number"),
-    b: z.number().describe("Second number"),
-  }),
-});
-
-const multiply = tool(({ a, b }) => a * b, {
-  name: "multiply",
-  description: "Multiply two numbers",
-  schema: z.object({
-    a: z.number().describe("First number"),
-    b: z.number().describe("Second number"),
-  }),
-});
-
-const divide = tool(({ a, b }) => a / b, {
-  name: "divide",
-  description: "Divide two numbers",
-  schema: z.object({
-    a: z.number().describe("First number"),
-    b: z.number().describe("Second number"),
-  }),
-});
-
-// Augment the LLM with tools
-export const arithmeticToolsByName = {
-  [add.name]: add,
-  [multiply.name]: multiply,
-  [divide.name]: divide,
-};
-
-//const arithmeticTools = Object.values(arithmeticToolsByName);
@@ -3,6 +3,7 @@ import fs from "fs";
 import { pipeline, cos_sim } from "@huggingface/transformers";
 import { logger } from "../../utils/logger";

+//TODO, am getting duplicates, is it from the multi files?
 const CSV_PATHS = [
  "./tools/clan/dev-eng.csv",
  // "./tools/clan/test-eng.csv",
@@ -0,0 +1,45 @@
+import { tool } from "@langchain/core/tools";
+import * as z from "zod";
+import { queryScraper } from "./webSearch";
+import { extractWebpageContent } from "./webpageFetch";
+
+
+function rankAndDisplayData(data: string[]):string {
+  //TODO: hybrid re-ranking of the provided data
+  return data.join("\n")
+}
+
+// Define tools
+const webSearch = tool(
+  async ({ a }) => {
+    const data = await queryScraper(a);
+    return rankAndDisplayData(data);
+  },
+  {
+    name: "WebSearch",
+    description: "Search DuckDuckGo for the provided query",
+    schema: z.object({
+      a: z.string().describe("Search term"),
+    }),
+  }
+);
+
+const openWebpage = tool(
+  async ({ a }) => {
+    const data = await extractWebpageContent(a);
+    return rankAndDisplayData(data);
+  },
+  {
+    name: "OpenWebpage",
+    description: "Opens webpage and returns most relevent snippets",
+    schema: z.object({
+      a: z.string().describe("URL"),
+    }),
+  }
+);
+
+// Augment the LLM with tools
+export const triggerEventToolsByName = {
+  [webSearch.name]: webSearch,
+  [openWebpage.name]: openWebpage
+};
@@ -1,6 +1,6 @@
 import axios from "axios";

-export async function queryScraper(query: string) {
+export async function queryScraper(query: string): Promise<string[]> {
    const instance = process.env.SCRAPER_INSTANCE;
    if (!instance) {
        throw new Error("SCRAPER_INSTANCE environment variable is not set");
@@ -10,7 +10,7 @@ export async function queryScraper(query: string) {

    const url = `${instance}/api/v1/web`;

-    const params : Record<string, string> = Object.entries(process.env)
+    const params: Record<string, string> = Object.entries(process.env)
        .filter(([key, value]) => key.startsWith("SCRAPER_PARAM_") && value !== undefined)
        .reduce((acc: Record<string, string>, [key, value]) => {
            const paramName = key.replace(/^SCRAPER_PARAM_/, "").toLowerCase();
@@ -35,12 +35,23 @@ export async function queryScraper(query: string) {

    const data = response.data;

-    // Basic validation
    if (data?.status !== "ok") {
        throw new Error(`API returned status: ${data?.status}`);
    }

-    return data;
+    // TEMP?: Convert API results to array of formatted strings.
+
+    const context = data.web ?? [];
+
+    const lines: string[] = context.map((item: any) => {
+        const title = (item.title ?? "").trim();
+        const desc = (item.description ?? "").trim();
+        const link = (item.url ?? "").trim();
+
+        return `- ${title}\n  ${desc}\n  ${link}`;
+    });
+
+    return lines;
 }


@@ -1,7 +1,7 @@
 import { Builder, Browser } from "selenium-webdriver";
 import firefox from "selenium-webdriver/firefox";

-async function extractWebpageContent(url: string) : Promise<string>{
+export async function extractWebpageContent(url: string) : Promise<string[]>{
    const options = new firefox.Options();
    options.addArguments("--headless");

@@ -18,7 +18,12 @@ async function extractWebpageContent(url: string) : Promise<string>{
            "return document.body.innerText;"
        ) as string;

-        return readableText
+        const filteredLines = readableText
+            .split(/\r?\n/)
+            .map(line => line.trim())
+            .filter(line => line.split(/\s+/).length > 1); 
+        
+        return filteredLines;
    } finally {
        await driver.quit()
    }