diff --git a/agent/agent.ts b/agent/agent.ts index 9bb59aa..e0a30b5 100644 --- a/agent/agent.ts +++ b/agent/agent.ts @@ -11,13 +11,18 @@ import { loopEndConditional } from "./conditionals/loop_end"; import { sort } from "./nodes/sort"; import { triggerEventSetup } from "./nodes/triggerEventSetup"; import { createEnsembleNode } from "./nodes/ensembleNode"; +import { selfEvalSetup } from "./nodes/selfEvalSetup"; const triggerEventToolNode = createToolNode(triggerEventToolsByName); +const peToolNode = createToolNode(triggerEventToolsByName); const normalisationModel = createModelNode([], "normalization.txt"); const triggerEventModel = createModelNode(triggerEventToolsByName, "trigger.txt"); +const evaluationModel = createModelNode([], "eval.txt"); +const peModel = createModelNode(triggerEventToolsByName, "posteval.txt"); -const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name); +const triggerEventToolConditional = createToolConditional("triggerEventToolNode", selfEvalSetup.name); +const peToolConditional = createToolConditional("peToolNode", verificationSetup.name); const roNode = createEnsembleNode("ROBERTA", "roberta"); const flNode = createEnsembleNode("FLAN", "flan"); @@ -33,6 +38,12 @@ const agent = new StateGraph(MessagesState) .addNode("triggerEventToolNode", triggerEventToolNode) .addNode("triggerEventModel", triggerEventModel) + .addNode(selfEvalSetup.name, selfEvalSetup) + .addNode("evaluationModel", evaluationModel) + + .addNode("peToolNode", peToolNode) + .addNode("peModel", peModel) + .addNode(verificationSetup.name, verificationSetup) .addNode("roNode", roNode) @@ -49,9 +60,16 @@ const agent = new StateGraph(MessagesState) .addEdge(triggerEventSetup.name, "triggerEventModel") // @ts-expect-error - .addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name]) + .addConditionalEdges("triggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", selfEvalSetup.name]) .addEdge("triggerEventToolNode", "triggerEventModel") + .addEdge(selfEvalSetup.name, "evaluationModel") + .addEdge("evaluationModel", "peModel") + + // @ts-expect-error + .addConditionalEdges("peModel", peToolConditional, ["peToolNode", verificationSetup.name]) + .addEdge("peToolNode", "peModel") + .addEdge(verificationSetup.name, "roNode") .addEdge(verificationSetup.name, "flNode") .addEdge(verificationSetup.name, "lrNode") diff --git a/agent/nodes/selfEvalSetup.ts b/agent/nodes/selfEvalSetup.ts new file mode 100644 index 0000000..f1671fb --- /dev/null +++ b/agent/nodes/selfEvalSetup.ts @@ -0,0 +1,21 @@ +import { GraphNode } from "@langchain/langgraph"; +import { MessagesState, ProposedTriggerEventArray } from "../state"; +import { logger } from "../utils/logger"; +import { queryScraper } from "../tools/webSearch"; +import { rankAndDisplayData } from "../tools/triggerEventTools"; + +export const selfEvalSetup: GraphNode = async (state) => { + let genResponse = state.messages.at(-1)?.content.toString() ?? ""; + const parsed = ProposedTriggerEventArray.parse(JSON.parse(genResponse)); + + for (let i = 0; i < parsed.length; i++) { + const search = parsed[i].SearchQuery + const data = await queryScraper(search); + const output = await rankAndDisplayData(data, search); + + parsed[i].context = output; + } + + return { evalTriggerEvent: parsed }; + +}; \ No newline at end of file diff --git a/agent/nodes/verificationSetup.ts b/agent/nodes/verificationSetup.ts index 86fd0d4..a41e6c3 100644 --- a/agent/nodes/verificationSetup.ts +++ b/agent/nodes/verificationSetup.ts @@ -13,15 +13,6 @@ export const verificationSetup: GraphNode = async (state) let genResponse = state.messages.at(-1)?.content.toString() ?? ""; const parsed = ProposedTriggerEventArray.parse(JSON.parse(genResponse)); - for (let i = 0; i < parsed.length; i++) { - const search = parsed[i].SearchQuery - // const data = await queryScraper(search); - // const output = await rankAndDisplayData(data, search); - - // parsed[i].context = output; - parsed[i].context = "NONE" - } - return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 }; } else { diff --git a/agent/prompts/eval.txt b/agent/prompts/eval.txt new file mode 100644 index 0000000..0a63314 --- /dev/null +++ b/agent/prompts/eval.txt @@ -0,0 +1,30 @@ +You are an impartial and meticulous evaluator assessing LLM’s response based on key quality dimensions +of honesty and usefulness. Your goal is to provide structured feedback that can be used to improve the +response. +Evaluation task: please follow these steps carefully: +1. Analyze the response based on the three dimensions below. +2. Provide justifications first: write a brief explanation justifying your assessment for each dimension. +3. Assign scores after justification: assign a score from 1 (poor) to 10 (excellent) for each dimension +based on your justification. +4. Synthesize: provide a brief overall impression and the single most important suggestion for improve- +ment, keeping in mind that explanation/honesty is the top priority, then followed by guidance. +Critique dimensions (evaluate in this order): +(1) Speficicity and usefullness: Can the proposed event be used to create a dataset of concrete events mapped to later +disinformation. +(2) Existance: Using the context provided, can the user be certain that the proposed trigger event actually happened +(3) Causality: Is there a possible link from the proposed trigger event to the disinformaiton at hand +Overall impression & key improvement suggestion: Briefly summarize the overall quality and state the +most critical change needed to improve the response. + +Disinformation query: +###NTITLE### +Disinformation date: +###CDATE### + +LLM’s response to evaluate: +###LM### + +Provided context: +###VESEARCHES### + +Let's think it through step by step \ No newline at end of file diff --git a/agent/prompts/hydratePrompt.ts b/agent/prompts/hydratePrompt.ts index c8eb330..4366829 100644 --- a/agent/prompts/hydratePrompt.ts +++ b/agent/prompts/hydratePrompt.ts @@ -15,6 +15,10 @@ export async function hydratePrompt(path: string, state: any) : Promise raw = raw.replace("###LM###", state.messages.at(-1).content); } + if (raw.indexOf("###L2M###") != -1) { + raw = raw.replace("###L2M###", state.messages.at(-2).content); + } + if (raw.indexOf("###NTITLE###") != -1) { raw = raw.replace("###NTITLE###", state.normalizedClaim); } @@ -33,5 +37,12 @@ export async function hydratePrompt(path: string, state: any) : Promise raw = raw.replace("###TESEARCH###", output) } + if (raw.indexOf("###VESEARCHES###") != -1) { + const output = state.evalTriggerEvent + .map(e => e.context) + .join("\n") + raw = raw.replace("###VESEARCHES###", output) + } + return raw; } diff --git a/agent/prompts/posteval.txt b/agent/prompts/posteval.txt new file mode 100644 index 0000000..0e41171 --- /dev/null +++ b/agent/prompts/posteval.txt @@ -0,0 +1,40 @@ +You are an expert editor tasked with making targeted improvements to an existing LLM’s response based +on a specific critique with the primary goal of enhancing its score according to evaluation standards while +preserving its strengths. +Your revision task: generate a revised version of the existing response. Your goal is not to rewrite it +completely, but to make precise edits only to address the specific weaknesses highlighted in the critique. +Instructions for editing: +- Identify specific flaws: carefully read the critique and pinpoint the exact issues raised (e.g., unclear +explanation, vagueness, inappropriate responses, the key suggestion). +- Perform minimal targeted edits: modify only the necessary sentences or paragraphs within the existing +response to directly fix these identified flaws. +- Strongly preserve strengths: crucially keep all other parts of the existing response intact. Do not +rephrase, restructure, or remove sections that were not criticized or likely contributed positively to its +initial score. +- Ensure coherence: verify that your targeted edits integrate smoothly and do not introduce contradictions +or awkward phrasing. +Output requirements: +- It should feel like a slightly polished or corrected version of the existing response, not a fundamentally +different answer. +- Do not mention the critique, scores, or the editing process. The output should be clean json that passes validation checks + +Again, use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,Date". +Use tools available to you if further information is required + +Add no new events, only improve the existing items + +Disinformation query: +###NTITLE### +Disinformation date: +###CDATE### + +LLM’s response to improve: +###L2M### + +Citique: +###LM### + +This contains specific feedback, justifications, scores from 1 to 10, and potentially a key improvement +suggestion. Focus on the justifications for low scores and the key suggestion. + +Let's think it through step by step \ No newline at end of file diff --git a/agent/state.ts b/agent/state.ts index b696559..5f70f7f 100644 --- a/agent/state.ts +++ b/agent/state.ts @@ -21,6 +21,7 @@ export const MessagesState = new StateSchema({ date: z.string(), messages: MessagesValue, proposedTriggerEvent: ProposedTriggerEventArray, + evalTriggerEvent: ProposedTriggerEventArray, proposedTriggerEventIndex: z.int(), normalizedClaim: z.string(), });