Allow multiple source CSV files for normalisation. Implement real model node. Add normalizarion prompt. Implement normalization setup. Start on RAG retreival functions
This commit is contained in:
+6
-4
@@ -8,14 +8,16 @@ import { createDummyModelNode } from "./nodes/dummyModel";
|
|||||||
import { verificationSetup } from "./nodes/verificationSetup";
|
import { verificationSetup } from "./nodes/verificationSetup";
|
||||||
import { dummyRagasMetrics } from "./nodes/dummyRagasMetrics";
|
import { dummyRagasMetrics } from "./nodes/dummyRagasMetrics";
|
||||||
import { produceRanking } from "./nodes/produceRanking";
|
import { produceRanking } from "./nodes/produceRanking";
|
||||||
|
import { createModelNode } from "./nodes/model";
|
||||||
|
|
||||||
const triggerEventToolNode = createToolNode(arithmeticToolsByName);
|
const triggerEventToolNode = createToolNode(arithmeticToolsByName);
|
||||||
const verificationToolNode = createToolNode(arithmeticToolsByName);
|
const verificationToolNode = createToolNode(arithmeticToolsByName);
|
||||||
|
|
||||||
const dummyTriggerEventModel = createDummyModelNode("Trigger Events of");
|
const dummyTriggerEventModel = createDummyModelNode("Trigger Events of");
|
||||||
const dummyNormalisationModel = createDummyModelNode("Normalised");
|
|
||||||
const dummyVerificationModel = createDummyModelNode("verification of");
|
const dummyVerificationModel = createDummyModelNode("verification of");
|
||||||
|
|
||||||
|
const normalisationModel = createModelNode([], "normalization.txt");
|
||||||
|
|
||||||
const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name);
|
const triggerEventToolConditional = createToolConditional("triggerEventToolNode", verificationSetup.name);
|
||||||
const verificationToolConditional = createToolConditional("verificationToolNode", produceRanking.name);
|
const verificationToolConditional = createToolConditional("verificationToolNode", produceRanking.name);
|
||||||
|
|
||||||
@@ -25,7 +27,7 @@ const agent = new StateGraph(MessagesState)
|
|||||||
//NODES
|
//NODES
|
||||||
|
|
||||||
.addNode(normalizationSetup.name, normalizationSetup)
|
.addNode(normalizationSetup.name, normalizationSetup)
|
||||||
.addNode("dummyNormalisationModel", dummyNormalisationModel)
|
.addNode("normalisationModel", normalisationModel)
|
||||||
|
|
||||||
.addNode("triggerEventToolNode", triggerEventToolNode)
|
.addNode("triggerEventToolNode", triggerEventToolNode)
|
||||||
.addNode("dummyTriggerEventModel", dummyTriggerEventModel)
|
.addNode("dummyTriggerEventModel", dummyTriggerEventModel)
|
||||||
@@ -37,8 +39,8 @@ const agent = new StateGraph(MessagesState)
|
|||||||
.addNode(produceRanking.name, produceRanking)
|
.addNode(produceRanking.name, produceRanking)
|
||||||
|
|
||||||
.addEdge(START, normalizationSetup.name)
|
.addEdge(START, normalizationSetup.name)
|
||||||
.addEdge(normalizationSetup.name, "dummyNormalisationModel")
|
.addEdge(normalizationSetup.name, "normalisationModel")
|
||||||
.addEdge("dummyNormalisationModel", "dummyTriggerEventModel")
|
.addEdge("normalisationModel", "dummyTriggerEventModel")
|
||||||
|
|
||||||
// @ts-expect-error
|
// @ts-expect-error
|
||||||
.addConditionalEdges("dummyTriggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
|
.addConditionalEdges("dummyTriggerEventModel", triggerEventToolConditional, ["triggerEventToolNode", verificationSetup.name])
|
||||||
|
|||||||
+24
-21
@@ -1,24 +1,27 @@
|
|||||||
// import { SystemMessage } from "@langchain/core/messages";
|
import { HumanMessage, SystemMessage } from "@langchain/core/messages";
|
||||||
// import { GraphNode } from "@langchain/langgraph";
|
import { GraphNode } from "@langchain/langgraph";
|
||||||
// import { MessagesState } from "../state";
|
import { MessagesState } from "../state";
|
||||||
// import { arithmeticTools } from "../tools/arithmetic";
|
import { ChatOpenAI } from "@langchain/openai"
|
||||||
// import { ChatOpenAI } from "@langchain/openai"
|
import { hydratePrompt } from "../prompts/hydratePrompt";
|
||||||
|
|
||||||
// const model = new ChatOpenAI({
|
export function createModelNode(tools: any, promptPath: string): GraphNode<typeof MessagesState> {
|
||||||
// model: "gpt-5-mini"
|
return async (state) => {
|
||||||
// });
|
const sysPrompt = hydratePrompt(promptPath, state.disinformationTitle)
|
||||||
|
|
||||||
// const modelWithTools = model.bindTools(arithmeticTools);
|
const model = new ChatOpenAI({
|
||||||
|
model: "gpt-5-mini"
|
||||||
|
});
|
||||||
|
const modelWithTools = model.bindTools(tools);
|
||||||
|
|
||||||
// export const llmCall: GraphNode<typeof MessagesState> = async (state) => {
|
const response = await modelWithTools.invoke([
|
||||||
// const response = await modelWithTools.invoke([
|
new SystemMessage(
|
||||||
// new SystemMessage(
|
sysPrompt
|
||||||
// "You are a helpful assistant tasked with performing arithmetic on a set of inputs. Any calculation, no matter how trivial, should be done with tools. Output the final answer with %%% on each side"
|
),
|
||||||
// ),
|
...state.messages,
|
||||||
// ...state.messages,
|
]);
|
||||||
// ]);
|
|
||||||
// return {
|
return {
|
||||||
// messages: [response],
|
messages: [response]
|
||||||
// llmCalls: 1,
|
};
|
||||||
// };
|
};
|
||||||
// };
|
}
|
||||||
@@ -1,9 +1,16 @@
|
|||||||
import { GraphNode } from "@langchain/langgraph";
|
import { GraphNode } from "@langchain/langgraph";
|
||||||
import { MessagesState } from "../state";
|
import { MessagesState } from "../state";
|
||||||
import { HumanMessage } from "@langchain/core/messages";
|
import { AIMessage, BaseMessage, HumanMessage } from "@langchain/core/messages";
|
||||||
|
import { calculateSimilarity } from "../tools/clan/retreiveExamples";
|
||||||
|
|
||||||
export const normalizationSetup: GraphNode<typeof MessagesState> = async (state) => {
|
export const normalizationSetup: GraphNode<typeof MessagesState> = async (state) => {
|
||||||
//TODO: Implement claim normalisation, using few shot prompting and CLAN Dataset
|
let similarityResults = await calculateSimilarity(state.disinformationTitle)
|
||||||
|
|
||||||
return { messages: [ new HumanMessage(state.disinformationTitle)] };
|
console.log(similarityResults)
|
||||||
|
|
||||||
|
let messages : BaseMessage[] = similarityResults.map((item) => {
|
||||||
|
return new AIMessage(`Original Claim: ${item.rawtext}. \n\n Normalised Claim: ${item.cleantext}`)
|
||||||
|
})
|
||||||
|
|
||||||
|
return { messages: messages, disinformationTitle: state.disinformationTitle };
|
||||||
};
|
};
|
||||||
Generated
+171
@@ -19,13 +19,21 @@
|
|||||||
"dotenv": "^17.2.3",
|
"dotenv": "^17.2.3",
|
||||||
"fs": "^0.0.1-security",
|
"fs": "^0.0.1-security",
|
||||||
"langchain": "^1.2.14",
|
"langchain": "^1.2.14",
|
||||||
|
"selenium-webdriver": "^4.40.0",
|
||||||
"winston": "^3.19.0"
|
"winston": "^3.19.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/node": "^25.1.0",
|
"@types/node": "^25.1.0",
|
||||||
|
"@types/selenium-webdriver": "^4.35.5",
|
||||||
"tsx": "^4.21.0"
|
"tsx": "^4.21.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@bazel/runfiles": {
|
||||||
|
"version": "6.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@bazel/runfiles/-/runfiles-6.5.0.tgz",
|
||||||
|
"integrity": "sha512-RzahvqTkfpY2jsDxo8YItPX+/iZ6hbiikw1YhE0bA9EKBR5Og8Pa6FHn9PO9M0zaXRVsr0GFQLKbB/0rzy9SzA==",
|
||||||
|
"license": "Apache-2.0"
|
||||||
|
},
|
||||||
"node_modules/@cfworker/json-schema": {
|
"node_modules/@cfworker/json-schema": {
|
||||||
"version": "4.1.1",
|
"version": "4.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/@cfworker/json-schema/-/json-schema-4.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/@cfworker/json-schema/-/json-schema-4.1.1.tgz",
|
||||||
@@ -1243,6 +1251,17 @@
|
|||||||
"undici-types": "~7.16.0"
|
"undici-types": "~7.16.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/selenium-webdriver": {
|
||||||
|
"version": "4.35.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/selenium-webdriver/-/selenium-webdriver-4.35.5.tgz",
|
||||||
|
"integrity": "sha512-wCQCjWmahRkUAO7S703UAvBFkxz4o/rjX4T2AOSWKXSi0sTQPsrXxR0GjtFUT0ompedLkYH4R5HO5Urz0hyeog==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*",
|
||||||
|
"@types/ws": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@types/triple-beam": {
|
"node_modules/@types/triple-beam": {
|
||||||
"version": "1.3.5",
|
"version": "1.3.5",
|
||||||
"resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz",
|
"resolved": "https://registry.npmjs.org/@types/triple-beam/-/triple-beam-1.3.5.tgz",
|
||||||
@@ -1255,6 +1274,16 @@
|
|||||||
"integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==",
|
"integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/@types/ws": {
|
||||||
|
"version": "8.18.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz",
|
||||||
|
"integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"@types/node": "*"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/ansi-styles": {
|
"node_modules/ansi-styles": {
|
||||||
"version": "5.2.0",
|
"version": "5.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
|
||||||
@@ -1463,6 +1492,12 @@
|
|||||||
"simple-wcswidth": "^1.1.2"
|
"simple-wcswidth": "^1.1.2"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/core-util-is": {
|
||||||
|
"version": "1.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
|
||||||
|
"integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/csv-parse": {
|
"node_modules/csv-parse": {
|
||||||
"version": "6.1.0",
|
"version": "6.1.0",
|
||||||
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-6.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-6.1.0.tgz",
|
||||||
@@ -1753,6 +1788,12 @@
|
|||||||
"url": "https://github.com/sponsors/ljharb"
|
"url": "https://github.com/sponsors/ljharb"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/immediate": {
|
||||||
|
"version": "3.0.6",
|
||||||
|
"resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz",
|
||||||
|
"integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/inherits": {
|
"node_modules/inherits": {
|
||||||
"version": "2.0.4",
|
"version": "2.0.4",
|
||||||
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
||||||
@@ -1783,6 +1824,12 @@
|
|||||||
"url": "https://github.com/sponsors/sindresorhus"
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/isarray": {
|
||||||
|
"version": "1.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
|
||||||
|
"integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/js-tiktoken": {
|
"node_modules/js-tiktoken": {
|
||||||
"version": "1.0.21",
|
"version": "1.0.21",
|
||||||
"resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz",
|
"resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.21.tgz",
|
||||||
@@ -1798,6 +1845,48 @@
|
|||||||
"integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
|
"integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
|
||||||
"license": "ISC"
|
"license": "ISC"
|
||||||
},
|
},
|
||||||
|
"node_modules/jszip": {
|
||||||
|
"version": "3.10.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz",
|
||||||
|
"integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==",
|
||||||
|
"license": "(MIT OR GPL-3.0-or-later)",
|
||||||
|
"dependencies": {
|
||||||
|
"lie": "~3.3.0",
|
||||||
|
"pako": "~1.0.2",
|
||||||
|
"readable-stream": "~2.3.6",
|
||||||
|
"setimmediate": "^1.0.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/jszip/node_modules/readable-stream": {
|
||||||
|
"version": "2.3.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
|
||||||
|
"integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"core-util-is": "~1.0.0",
|
||||||
|
"inherits": "~2.0.3",
|
||||||
|
"isarray": "~1.0.0",
|
||||||
|
"process-nextick-args": "~2.0.0",
|
||||||
|
"safe-buffer": "~5.1.1",
|
||||||
|
"string_decoder": "~1.1.1",
|
||||||
|
"util-deprecate": "~1.0.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/jszip/node_modules/safe-buffer": {
|
||||||
|
"version": "5.1.2",
|
||||||
|
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
|
||||||
|
"integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/jszip/node_modules/string_decoder": {
|
||||||
|
"version": "1.1.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
|
||||||
|
"integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"safe-buffer": "~5.1.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/kuler": {
|
"node_modules/kuler": {
|
||||||
"version": "2.0.0",
|
"version": "2.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/kuler/-/kuler-2.0.0.tgz",
|
||||||
@@ -1857,6 +1946,15 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/lie": {
|
||||||
|
"version": "3.3.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz",
|
||||||
|
"integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"immediate": "~3.0.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/logform": {
|
"node_modules/logform": {
|
||||||
"version": "2.7.0",
|
"version": "2.7.0",
|
||||||
"resolved": "https://registry.npmjs.org/logform/-/logform-2.7.0.tgz",
|
"resolved": "https://registry.npmjs.org/logform/-/logform-2.7.0.tgz",
|
||||||
@@ -2062,12 +2160,24 @@
|
|||||||
"node": ">=8"
|
"node": ">=8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/pako": {
|
||||||
|
"version": "1.0.11",
|
||||||
|
"resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
|
||||||
|
"integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==",
|
||||||
|
"license": "(MIT AND Zlib)"
|
||||||
|
},
|
||||||
"node_modules/platform": {
|
"node_modules/platform": {
|
||||||
"version": "1.3.6",
|
"version": "1.3.6",
|
||||||
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
|
"resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
|
||||||
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
|
"integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/process-nextick-args": {
|
||||||
|
"version": "2.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
|
||||||
|
"integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/protobufjs": {
|
"node_modules/protobufjs": {
|
||||||
"version": "7.5.4",
|
"version": "7.5.4",
|
||||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz",
|
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.4.tgz",
|
||||||
@@ -2162,6 +2272,31 @@
|
|||||||
"node": ">=10"
|
"node": ">=10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/selenium-webdriver": {
|
||||||
|
"version": "4.40.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/selenium-webdriver/-/selenium-webdriver-4.40.0.tgz",
|
||||||
|
"integrity": "sha512-dU0QbnVKdPmoNP8OtMCazRdtU2Ux6Wl4FEpG1iwUbDeajJK1dBAywBLrC1D7YFRtogHzN96AbXBgBAJaarcysw==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/SeleniumHQ"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "opencollective",
|
||||||
|
"url": "https://opencollective.com/selenium"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@bazel/runfiles": "^6.5.0",
|
||||||
|
"jszip": "^3.10.1",
|
||||||
|
"tmp": "^0.2.5",
|
||||||
|
"ws": "^8.18.3"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">= 20.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/semver": {
|
"node_modules/semver": {
|
||||||
"version": "7.7.3",
|
"version": "7.7.3",
|
||||||
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
|
"resolved": "https://registry.npmjs.org/semver/-/semver-7.7.3.tgz",
|
||||||
@@ -2195,6 +2330,12 @@
|
|||||||
"url": "https://github.com/sponsors/sindresorhus"
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/setimmediate": {
|
||||||
|
"version": "1.0.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz",
|
||||||
|
"integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/sharp": {
|
"node_modules/sharp": {
|
||||||
"version": "0.34.5",
|
"version": "0.34.5",
|
||||||
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz",
|
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz",
|
||||||
@@ -2303,6 +2444,15 @@
|
|||||||
"integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==",
|
"integrity": "sha512-uuVGNWzgJ4yhRaNSiubPY7OjISw4sw4E5Uv0wbjp+OzcbmVU/rsT8ujgcXJhn9ypzsgr5vlzpPqP+MBBKcGvbg==",
|
||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
|
"node_modules/tmp": {
|
||||||
|
"version": "0.2.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz",
|
||||||
|
"integrity": "sha512-voyz6MApa1rQGUxT3E+BK7/ROe8itEx7vD8/HEvt4xwXucvQ5G5oeEiHkmHZJuBO21RpOf+YYm9MOivj709jow==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14.14"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/triple-beam": {
|
"node_modules/triple-beam": {
|
||||||
"version": "1.4.1",
|
"version": "1.4.1",
|
||||||
"resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz",
|
"resolved": "https://registry.npmjs.org/triple-beam/-/triple-beam-1.4.1.tgz",
|
||||||
@@ -2423,6 +2573,27 @@
|
|||||||
"node": ">= 12.0.0"
|
"node": ">= 12.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/ws": {
|
||||||
|
"version": "8.19.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz",
|
||||||
|
"integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==",
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=10.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"bufferutil": "^4.0.1",
|
||||||
|
"utf-8-validate": ">=5.0.2"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"bufferutil": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"utf-8-validate": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/yallist": {
|
"node_modules/yallist": {
|
||||||
"version": "5.0.0",
|
"version": "5.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
|
||||||
|
|||||||
@@ -20,10 +20,12 @@
|
|||||||
"dotenv": "^17.2.3",
|
"dotenv": "^17.2.3",
|
||||||
"fs": "^0.0.1-security",
|
"fs": "^0.0.1-security",
|
||||||
"langchain": "^1.2.14",
|
"langchain": "^1.2.14",
|
||||||
|
"selenium-webdriver": "^4.40.0",
|
||||||
"winston": "^3.19.0"
|
"winston": "^3.19.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/node": "^25.1.0",
|
"@types/node": "^25.1.0",
|
||||||
|
"@types/selenium-webdriver": "^4.35.5",
|
||||||
"tsx": "^4.21.0"
|
"tsx": "^4.21.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
import fs from "fs";
|
||||||
|
|
||||||
|
export function hydratePrompt(path: string, replacement: string) {
|
||||||
|
// TODO: expand into full context-based replacement engine
|
||||||
|
|
||||||
|
let raw = fs.readFileSync("prompts/" + path, "utf-8");
|
||||||
|
|
||||||
|
return raw.replace("###", replacement)
|
||||||
|
}
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
You are part of an agent in a process to tack state-sponsored disinformation
|
||||||
|
|
||||||
|
In order for the following debunk articles to be automatically referenced below an offensive post, the main offensive statement should be extracted, so it can be run in a semantic matcher
|
||||||
|
|
||||||
|
Some of the data comes from debunk datasets, please remove any references to that
|
||||||
|
|
||||||
|
Reduce this title from a disinformation tracking api to a short concise claim
|
||||||
|
|
||||||
|
Make all parts of the claim definite
|
||||||
|
For example:
|
||||||
|
Something could have potentially happened BECOMES something happened
|
||||||
|
DISINFORMATION CLAIM: something is NOT true BECOMES something is true
|
||||||
|
|
||||||
|
Relevent examples are included in preceeding messages, use these as exact inspiration.
|
||||||
|
|
||||||
|
The claim to normalize is:
|
||||||
|
###
|
||||||
|
|
||||||
|
Produce no other text other than the condensed claim.
|
||||||
@@ -3,21 +3,29 @@ import fs from "fs";
|
|||||||
import { pipeline, cos_sim } from "@huggingface/transformers";
|
import { pipeline, cos_sim } from "@huggingface/transformers";
|
||||||
import { logger } from "../../utils/logger";
|
import { logger } from "../../utils/logger";
|
||||||
|
|
||||||
const CSV_PATH = "./tools/clan/dev-eng.csv";
|
const CSV_PATHS = [
|
||||||
const CACHE_PATH = "./tools/clan/dev-eng.embeddings.json";
|
"./tools/clan/dev-eng.csv",
|
||||||
|
// "./tools/clan/test-eng.csv",
|
||||||
|
"./tools/clan/train-eng.csv",
|
||||||
|
];
|
||||||
|
|
||||||
|
const CACHE_PATH = "./tools/clan/dev.embeddings.json";
|
||||||
|
|
||||||
type EmbeddingCache = {
|
type EmbeddingCache = {
|
||||||
texts: string[];
|
rawtexts: string[];
|
||||||
|
cleantexts: string[];
|
||||||
embeddings: number[][];
|
embeddings: number[][];
|
||||||
};
|
};
|
||||||
|
|
||||||
export type NormalisedMatch = {
|
export type NormalisedMatch = {
|
||||||
index: number;
|
index: number;
|
||||||
score: number;
|
score: number;
|
||||||
text: string
|
rawtext: string;
|
||||||
|
cleantext: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
let texts: string[] = [];
|
let rawtexts: string[] = [];
|
||||||
|
let cleantexts: string[] = [];
|
||||||
let embeddings: number[][] = [];
|
let embeddings: number[][] = [];
|
||||||
|
|
||||||
const featureExtractor = await pipeline(
|
const featureExtractor = await pipeline(
|
||||||
@@ -33,20 +41,23 @@ async function loadOrBuildCache(): Promise<void> {
|
|||||||
const raw = fs.readFileSync(CACHE_PATH, "utf-8");
|
const raw = fs.readFileSync(CACHE_PATH, "utf-8");
|
||||||
const cache: EmbeddingCache = JSON.parse(raw);
|
const cache: EmbeddingCache = JSON.parse(raw);
|
||||||
|
|
||||||
texts = cache.texts;
|
rawtexts = cache.rawtexts;
|
||||||
|
cleantexts = cache.cleantexts;
|
||||||
embeddings = cache.embeddings.map(e => Array.from(e));
|
embeddings = cache.embeddings.map(e => Array.from(e));
|
||||||
|
|
||||||
logger.info("Loaded %s embeddings", embeddings.length);
|
logger.info("Loaded %s embeddings", embeddings.length);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.warn("Cache not found. Generating embeddings", embeddings.length);
|
logger.warn("Cache not found. Generating embeddings");
|
||||||
|
|
||||||
await buildCacheFromCSV();
|
for (const csvPath of CSV_PATHS) {
|
||||||
|
await buildCacheFromCSV(csvPath);
|
||||||
|
}
|
||||||
|
|
||||||
const cache: EmbeddingCache = {
|
const cache: EmbeddingCache = {
|
||||||
texts,
|
rawtexts,
|
||||||
|
cleantexts,
|
||||||
embeddings,
|
embeddings,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -55,10 +66,12 @@ async function loadOrBuildCache(): Promise<void> {
|
|||||||
logger.info("Cached %s embeddings", embeddings.length);
|
logger.info("Cached %s embeddings", embeddings.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function buildCacheFromCSV(): Promise<void> {
|
async function buildCacheFromCSV(csvPath: string): Promise<void> {
|
||||||
let count = 0;
|
let count = 0;
|
||||||
|
|
||||||
const stream = fs.createReadStream(CSV_PATH).pipe(parse());
|
logger.info("Processing CSV: %s", csvPath);
|
||||||
|
|
||||||
|
const stream = fs.createReadStream(csvPath).pipe(parse());
|
||||||
|
|
||||||
for await (const row of stream) {
|
for await (const row of stream) {
|
||||||
const text = row[0];
|
const text = row[0];
|
||||||
@@ -69,19 +82,27 @@ async function buildCacheFromCSV(): Promise<void> {
|
|||||||
normalize: true,
|
normalize: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
texts.push(text);
|
rawtexts.push(text);
|
||||||
|
cleantexts.push(row[1]);
|
||||||
const vector = Array.from(output.data as Float32Array);
|
const vector = Array.from(output.data as Float32Array);
|
||||||
embeddings.push(vector);
|
embeddings.push(vector);
|
||||||
|
|
||||||
|
|
||||||
count++;
|
count++;
|
||||||
if (count % 100 === 0) {
|
if (count % 100 === 0) {
|
||||||
logger.info("Processed %s", count);
|
logger.info("[%s] Processed %s rows", csvPath, count);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function calculateSimilarity(query: string,topK = 5): Promise<NormalisedMatch[]> {
|
logger.info("[%s] Finished (%s rows)", csvPath, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function calculateSimilarity(
|
||||||
|
query: string,
|
||||||
|
topK = 5
|
||||||
|
): Promise<NormalisedMatch[]> {
|
||||||
|
await loadOrBuildCache()
|
||||||
|
|
||||||
const queryEmbedding = await featureExtractor(query, {
|
const queryEmbedding = await featureExtractor(query, {
|
||||||
pooling: "mean",
|
pooling: "mean",
|
||||||
normalize: true,
|
normalize: true,
|
||||||
@@ -91,17 +112,9 @@ export async function calculateSimilarity(query: string,topK = 5): Promise<Norma
|
|||||||
.map((embedding, index) => ({
|
.map((embedding, index) => ({
|
||||||
index,
|
index,
|
||||||
score: cos_sim(embedding, queryEmbedding.data as number[]),
|
score: cos_sim(embedding, queryEmbedding.data as number[]),
|
||||||
text: texts[index],
|
rawtext: rawtexts[index],
|
||||||
|
cleantext: cleantexts[index]
|
||||||
}))
|
}))
|
||||||
.sort((a, b) => b.score - a.score)
|
.sort((a, b) => b.score - a.score)
|
||||||
.slice(0, topK);
|
.slice(0, topK);
|
||||||
}
|
}
|
||||||
|
|
||||||
//TEMP: testing code
|
|
||||||
await loadOrBuildCache();
|
|
||||||
|
|
||||||
const results = await calculateSimilarity(
|
|
||||||
"Wonderful to see London has taken a stand to defend freedom and the right to choose."
|
|
||||||
);
|
|
||||||
|
|
||||||
console.log(results);
|
|
||||||
|
|||||||
@@ -0,0 +1,29 @@
|
|||||||
|
import { Builder, Browser } from "selenium-webdriver";
|
||||||
|
import firefox from "selenium-webdriver/firefox";
|
||||||
|
|
||||||
|
async function extractWebpageContent(url: string) : Promise<string>{
|
||||||
|
const options = new firefox.Options();
|
||||||
|
options.addArguments("--headless");
|
||||||
|
|
||||||
|
let driver = await new Builder().forBrowser(Browser.FIREFOX).setFirefoxOptions(options).build()
|
||||||
|
try {
|
||||||
|
await driver.get(url)
|
||||||
|
await driver.wait(async () => {
|
||||||
|
return await driver.executeScript(
|
||||||
|
"return document.readyState === 'complete'"
|
||||||
|
);
|
||||||
|
}, 5000);
|
||||||
|
|
||||||
|
const readableText = await driver.executeScript(
|
||||||
|
"return document.body.innerText;"
|
||||||
|
) as string;
|
||||||
|
|
||||||
|
return readableText
|
||||||
|
} finally {
|
||||||
|
await driver.quit()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//TODO: Extract, rank snippets
|
||||||
|
|
||||||
|
//console.log(await extractWebpageContent("https://www.bbc.co.uk/news/live/c74wd01egvyt"))
|
||||||
Reference in New Issue
Block a user