From 02eac0f55321068ed770ed1f0e54a120b29b09f8 Mon Sep 17 00:00:00 2001 From: William Jeynes Date: Mon, 9 Feb 2026 16:32:40 +0000 Subject: [PATCH] Allow multiple source CSV files for normalisation. Implement real model node. Add normalizarion prompt. Implement normalization setup. Start on RAG retreival functions --- agent/agent.ts | 10 +- agent/nodes/model.ts | 45 +++---- agent/nodes/normalizationSetup.ts | 13 +- agent/package-lock.json | 171 +++++++++++++++++++++++++++ agent/package.json | 2 + agent/prompts/hydratePrompt.ts | 9 ++ agent/prompts/normalization.txt | 19 +++ agent/tools/clan/retreiveExamples.ts | 69 ++++++----- agent/tools/webpageFetch.ts | 29 +++++ 9 files changed, 311 insertions(+), 56 deletions(-) create mode 100644 agent/prompts/hydratePrompt.ts create mode 100644 agent/prompts/normalization.txt create mode 100644 agent/tools/webpageFetch.ts diff --git a/agent/agent.ts b/agent/agent.ts index c53eb69..e16dd72 100644 --- a/agent/agent.ts +++ b/agent/agent.ts @@ -8,14 +8,16 @@ import { createDummyModelNode } from "./nodes/dummyModel"; import { verificationSetup } from "./nodes/verificationSetup"; import { dummyRagasMetrics } from "./nodes/dummyRagasMetrics"; import { produceRanking } from "./nodes/produceRanking"; +import { createModelNode } from "./nodes/model"; const triggerEventToolNode = createToolNode(arithmeticToolsByName); const verificationToolNode = createToolNode(arithmeticToolsByName); const dummyTriggerEventModel = createDummyModelNode("Trigger Events of"); -const dummyNormalisationModel = createDummyModelNode("Normalised"); const dummyVerificationModel = createDummyModelNode("verification of"); +const normalisationModel = createModelNode([], "normalization.txt"); + const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name); const verificationToolConditional = createToolConditional("verificationToolNode", produceRanking.name); @@ -25,7 +27,7 @@ const agent = new StateGraph(MessagesState) //NODES .addNode(normalizationSetup.name, normalizationSetup) - .addNode("dummyNormalisationModel", dummyNormalisationModel) + .addNode("normalisationModel", normalisationModel) .addNode("triggerEventToolNode", triggerEventToolNode) .addNode("dummyTriggerEventModel", dummyTriggerEventModel) @@ -37,8 +39,8 @@ const agent = new StateGraph(MessagesState) .addNode(produceRanking.name, produceRanking) .addEdge(START, normalizationSetup.name) - .addEdge(normalizationSetup.name, "dummyNormalisationModel") - .addEdge("dummyNormalisationModel", "dummyTriggerEventModel") + .addEdge(normalizationSetup.name, "normalisationModel") + .addEdge("normalisationModel", "dummyTriggerEventModel") // @ts-expect-error .addConditionalEdges("dummyTriggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name]) diff --git a/agent/nodes/model.ts b/agent/nodes/model.ts index e7cc2a2..a69c265 100644 --- a/agent/nodes/model.ts +++ b/agent/nodes/model.ts @@ -1,24 +1,27 @@ -// import { SystemMessage } from "@langchain/core/messages"; -// import { GraphNode } from "@langchain/langgraph"; -// import { MessagesState } from "../state"; -// import { arithmeticTools } from "../tools/arithmetic"; -// import { ChatOpenAI } from "@langchain/openai" +import { HumanMessage, SystemMessage } from "@langchain/core/messages"; +import { GraphNode } from "@langchain/langgraph"; +import { MessagesState } from "../state"; +import { ChatOpenAI } from "@langchain/openai" +import { hydratePrompt } from "../prompts/hydratePrompt"; -// const model = new ChatOpenAI({ -// model: "gpt-5-mini" -// }); +export function createModelNode(tools: any, promptPath: string): GraphNode { + return async (state) => { + const sysPrompt = hydratePrompt(promptPath, state.disinformationTitle) -// const modelWithTools = model.bindTools(arithmeticTools); + const model = new ChatOpenAI({ + model: "gpt-5-mini" + }); + const modelWithTools = model.bindTools(tools); -// export const llmCall: GraphNode = async (state) => { -// const response = await modelWithTools.invoke([ -// new SystemMessage( -// "You are a helpful assistant tasked with performing arithmetic on a set of inputs. Any calculation, no matter how trivial, should be done with tools. Output the final answer with %%% on each side" -// ), -// ...state.messages, -// ]); -// return { -// messages: [response], -// llmCalls: 1, -// }; -// }; \ No newline at end of file + const response = await modelWithTools.invoke([ + new SystemMessage( + sysPrompt + ), + ...state.messages, + ]); + + return { + messages: [response] + }; + }; +} \ No newline at end of file diff --git a/agent/nodes/normalizationSetup.ts b/agent/nodes/normalizationSetup.ts index d68e350..58eac63 100644 --- a/agent/nodes/normalizationSetup.ts +++ b/agent/nodes/normalizationSetup.ts @@ -1,9 +1,16 @@ import { GraphNode } from "@langchain/langgraph"; import { MessagesState } from "../state"; -import { HumanMessage } from "@langchain/core/messages"; +import { AIMessage, BaseMessage, HumanMessage } from "@langchain/core/messages"; +import { calculateSimilarity } from "../tools/clan/retreiveExamples"; export const normalizationSetup: GraphNode = async (state) => { - //TODO: Implement claim normalisation, using few shot prompting and CLAN Dataset + let similarityResults = await calculateSimilarity(state.disinformationTitle) + + console.log(similarityResults) + + let messages : BaseMessage[] = similarityResults.map((item) => { + return new AIMessage(`Original Claim: ${item.rawtext}. \n\n Normalised Claim: ${item.cleantext}`) + }) - return { messages: [ new HumanMessage(state.disinformationTitle)] }; + return { messages: messages, disinformationTitle: state.disinformationTitle }; }; \ No newline at end of file diff --git a/agent/package-lock.json b/agent/package-lock.json index a2ba662..c68aa82 100644 --- a/agent/package-lock.json +++ b/agent/package-lock.json @@ -19,13 +19,21 @@ "dotenv": "^17.2.3", "fs": "^0.0.1-security", "langchain": "^1.2.14", + "selenium-webdriver": "^4.40.0", "winston": "^3.19.0" }, "devDependencies": { "@types/node": "^25.1.0", + "@types/selenium-webdriver": "^4.35.5", "tsx": "^4.21.0" } }, + "node_modules/@bazel/runfiles": { + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/@bazel/runfiles/-/runfiles-6.5.0.tgz", + "integrity": "sha512-RzahvqTkfpY2jsDxo8YItPX+/iZ6hbiikw1YhE0bA9EKBR5Og8Pa6FHn9PO9M0zaXRVsr0GFQLKbB/0rzy9SzA==", + "license": "Apache-2.0" + }, "node_modules/@cfworker/json-schema": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/@cfworker/json-schema/-/json-schema-4.1.1.tgz", @@ -1243,6 +1251,17 @@ "undici-types": "~7.16.0" } }, + "node_modules/@types/selenium-webdriver": { + "version": "4.35.5", + "resolved": "https://registry.npmjs.org/@types/selenium-webdriver/-/selenium-webdriver-4.35.5.tgz", + "integrity": "sha512-wCQCjWmahRkUAO7S703UAvBFkxz4o/rjX4T2AOSWKXSi0sTQPsrXxR0GjtFUT0ompedLkYH4R5HO5Urz0hyeog==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*", + "@types/ws": "*" + } + }, "node_modules/@types/triple-beam": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz", @@ -1255,6 +1274,16 @@ "integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==", "license": "MIT" }, + "node_modules/@types/ws": { + "version": "8.18.1", + "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz", + "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/ansi-styles": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", @@ -1463,6 +1492,12 @@ "simple-wcswidth": "^1.1.2" } }, + "node_modules/core-util-is": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==", + "license": "MIT" + }, "node_modules/csv-parse": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-6.1.0.tgz", @@ -1753,6 +1788,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==", + "license": "MIT" + }, "node_modules/inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", @@ -1783,6 +1824,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", + "license": "MIT" + }, "node_modules/js-tiktoken": { "version": "1.0.21", "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz", @@ -1798,6 +1845,48 @@ "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==", "license": "ISC" }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "license": "(MIT OR GPL-3.0-or-later)", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, + "node_modules/jszip/node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "license": "MIT", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/jszip/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "license": "MIT" + }, + "node_modules/jszip/node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, "node_modules/kuler": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz", @@ -1857,6 +1946,15 @@ } } }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "license": "MIT", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/logform": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/logform/-/logform-2.7.0.tgz", @@ -2062,12 +2160,24 @@ "node": ">=8" } }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", + "license": "(MIT AND Zlib)" + }, "node_modules/platform": { "version": "1.3.6", "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz", "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==", "license": "MIT" }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", + "license": "MIT" + }, "node_modules/protobufjs": { "version": "7.5.4", "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz", @@ -2162,6 +2272,31 @@ "node": ">=10" } }, + "node_modules/selenium-webdriver": { + "version": "4.40.0", + "resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.40.0.tgz", + "integrity": "sha512-dU0QbnVKdPmoNP8OtMCazRdtU2Ux6Wl4FEpG1iwUbDeajJK1dBAywBLrC1D7YFRtogHzN96AbXBgBAJaarcysw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/SeleniumHQ" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/selenium" + } + ], + "license": "Apache-2.0", + "dependencies": { + "@bazel/runfiles": "^6.5.0", + "jszip": "^3.10.1", + "tmp": "^0.2.5", + "ws": "^8.18.3" + }, + "engines": { + "node": ">= 20.0.0" + } + }, "node_modules/semver": { "version": "7.7.3", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz", @@ -2195,6 +2330,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", + "license": "MIT" + }, "node_modules/sharp": { "version": "0.34.5", "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz", @@ -2303,6 +2444,15 @@ "integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==", "license": "MIT" }, + "node_modules/tmp": { + "version": "0.2.5", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz", + "integrity": "sha512-voyz6MApa1rQGUxT3E+BK7/ROe8itEx7vD8/HEvt4xwXucvQ5G5oeEiHkmHZJuBO21RpOf+YYm9MOivj709jow==", + "license": "MIT", + "engines": { + "node": ">=14.14" + } + }, "node_modules/triple-beam": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz", @@ -2423,6 +2573,27 @@ "node": ">= 12.0.0" } }, + "node_modules/ws": { + "version": "8.19.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz", + "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, "node_modules/yallist": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", diff --git a/agent/package.json b/agent/package.json index 82f096d..2cbf959 100644 --- a/agent/package.json +++ b/agent/package.json @@ -20,10 +20,12 @@ "dotenv": "^17.2.3", "fs": "^0.0.1-security", "langchain": "^1.2.14", + "selenium-webdriver": "^4.40.0", "winston": "^3.19.0" }, "devDependencies": { "@types/node": "^25.1.0", + "@types/selenium-webdriver": "^4.35.5", "tsx": "^4.21.0" } } diff --git a/agent/prompts/hydratePrompt.ts b/agent/prompts/hydratePrompt.ts new file mode 100644 index 0000000..ab80445 --- /dev/null +++ b/agent/prompts/hydratePrompt.ts @@ -0,0 +1,9 @@ +import fs from "fs"; + +export function hydratePrompt(path: string, replacement: string) { + // TODO: expand into full context-based replacement engine + + let raw = fs.readFileSync("prompts/" + path, "utf-8"); + + return raw.replace("###", replacement) +} \ No newline at end of file diff --git a/agent/prompts/normalization.txt b/agent/prompts/normalization.txt new file mode 100644 index 0000000..5c20d9c --- /dev/null +++ b/agent/prompts/normalization.txt @@ -0,0 +1,19 @@ +You are part of an agent in a process to tack state-sponsored disinformation + +In order for the following debunk articles to be automatically referenced below an offensive post, the main offensive statement should be extracted, so it can be run in a semantic matcher + +Some of the data comes from debunk datasets, please remove any references to that + +Reduce this title from a disinformation tracking api to a short concise claim + +Make all parts of the claim definite +For example: +Something could have potentially happened BECOMES something happened +DISINFORMATION CLAIM: something is NOT true BECOMES something is true + +Relevent examples are included in preceeding messages, use these as exact inspiration. + +The claim to normalize is: +### + +Produce no other text other than the condensed claim. \ No newline at end of file diff --git a/agent/tools/clan/retreiveExamples.ts b/agent/tools/clan/retreiveExamples.ts index e95f52a..1522b77 100644 --- a/agent/tools/clan/retreiveExamples.ts +++ b/agent/tools/clan/retreiveExamples.ts @@ -3,21 +3,29 @@ import fs from "fs"; import { pipeline, cos_sim } from "@huggingface/transformers"; import { logger } from "../../utils/logger"; -const CSV_PATH = "./tools/clan/dev-eng.csv"; -const CACHE_PATH = "./tools/clan/dev-eng.embeddings.json"; +const CSV_PATHS = [ + "./tools/clan/dev-eng.csv", + // "./tools/clan/test-eng.csv", + "./tools/clan/train-eng.csv", +]; + +const CACHE_PATH = "./tools/clan/dev.embeddings.json"; type EmbeddingCache = { - texts: string[]; + rawtexts: string[]; + cleantexts: string[]; embeddings: number[][]; }; export type NormalisedMatch = { - index: number; - score: number; - text: string + index: number; + score: number; + rawtext: string; + cleantext: string; }; -let texts: string[] = []; +let rawtexts: string[] = []; +let cleantexts: string[] = []; let embeddings: number[][] = []; const featureExtractor = await pipeline( @@ -33,20 +41,23 @@ async function loadOrBuildCache(): Promise { const raw = fs.readFileSync(CACHE_PATH, "utf-8"); const cache: EmbeddingCache = JSON.parse(raw); - texts = cache.texts; - + rawtexts = cache.rawtexts; + cleantexts = cache.cleantexts; embeddings = cache.embeddings.map(e => Array.from(e)); logger.info("Loaded %s embeddings", embeddings.length); return; } - logger.warn("Cache not found. Generating embeddings", embeddings.length); + logger.warn("Cache not found. Generating embeddings"); - await buildCacheFromCSV(); + for (const csvPath of CSV_PATHS) { + await buildCacheFromCSV(csvPath); + } const cache: EmbeddingCache = { - texts, + rawtexts, + cleantexts, embeddings, }; @@ -55,10 +66,12 @@ async function loadOrBuildCache(): Promise { logger.info("Cached %s embeddings", embeddings.length); } -async function buildCacheFromCSV(): Promise { +async function buildCacheFromCSV(csvPath: string): Promise { let count = 0; - const stream = fs.createReadStream(CSV_PATH).pipe(parse()); + logger.info("Processing CSV: %s", csvPath); + + const stream = fs.createReadStream(csvPath).pipe(parse()); for await (const row of stream) { const text = row[0]; @@ -69,19 +82,27 @@ async function buildCacheFromCSV(): Promise { normalize: true, }); - texts.push(text); + rawtexts.push(text); + cleantexts.push(row[1]); const vector = Array.from(output.data as Float32Array); embeddings.push(vector); count++; if (count % 100 === 0) { - logger.info("Processed %s", count); + logger.info("[%s] Processed %s rows", csvPath, count); } } + + logger.info("[%s] Finished (%s rows)", csvPath, count); } -export async function calculateSimilarity(query: string,topK = 5): Promise { +export async function calculateSimilarity( + query: string, + topK = 5 +): Promise { + await loadOrBuildCache() + const queryEmbedding = await featureExtractor(query, { pooling: "mean", normalize: true, @@ -91,17 +112,9 @@ export async function calculateSimilarity(query: string,topK = 5): Promise ({ index, score: cos_sim(embedding, queryEmbedding.data as number[]), - text: texts[index], + rawtext: rawtexts[index], + cleantext: cleantexts[index] })) .sort((a, b) => b.score - a.score) .slice(0, topK); -} - -//TEMP: testing code -await loadOrBuildCache(); - -const results = await calculateSimilarity( - "Wonderful to see London has taken a stand to defend freedom and the right to choose." -); - -console.log(results); +} \ No newline at end of file diff --git a/agent/tools/webpageFetch.ts b/agent/tools/webpageFetch.ts new file mode 100644 index 0000000..3c74d90 --- /dev/null +++ b/agent/tools/webpageFetch.ts @@ -0,0 +1,29 @@ +import { Builder, Browser } from "selenium-webdriver"; +import firefox from "selenium-webdriver/firefox"; + +async function extractWebpageContent(url: string) : Promise{ + const options = new firefox.Options(); + options.addArguments("--headless"); + + let driver = await new Builder().forBrowser(Browser.FIREFOX).setFirefoxOptions(options).build() + try { + await driver.get(url) + await driver.wait(async () => { + return await driver.executeScript( + "return document.readyState === 'complete'" + ); + }, 5000); + + const readableText = await driver.executeScript( + "return document.body.innerText;" + ) as string; + + return readableText + } finally { + await driver.quit() + } +} + +//TODO: Extract, rank snippets + +//console.log(await extractWebpageContent("https://www.bbc.co.uk/news/live/c74wd01egvyt")) \ No newline at end of file