Add removing of duplicates from pipeline. Add to sort step. Move score logic to robertaMetrics node.

This commit is contained in:
William Jeynes
2026-03-13 14:51:14 +00:00
parent d5c6cb444d
commit 0a7bb114d2
6 changed files with 105 additions and 7 deletions
+6 -1
View File
@@ -8,7 +8,12 @@ export const robertaMetrics: GraphNode<typeof MessagesState> = async (state) =>
const result = await evaluateWithRoberta({answer})
let score = 0;
if (result.validProb > result.invalidProb) {
score = 0.7 + ((result.validProb - result.invalidProb)*0.3);
}
return {
messages: [ new AIMessage("ROBERTA:" + result)]
messages: [ new AIMessage("ROBERTA:" + score)]
};
};
+6 -2
View File
@@ -1,10 +1,14 @@
import { GraphNode } from "@langchain/langgraph";
import { MessagesState } from "../state";
import { AIMessage } from "@langchain/core/messages";
import { removeDuplicates } from "../tools/removeDuplicates";
export const sort: GraphNode<typeof MessagesState> = async (state) => {
//not sure which will be better from API, just do both
let current = state.proposedTriggerEvent;
// remove duplicates
await removeDuplicates(current)
// not sure which will be better from API, just do both
current.sort((a, b) => ((b.score as number) ?? 0) - ((a.score as number) ?? 0));
+44
View File
@@ -0,0 +1,44 @@
import { pipeline, cos_sim } from "@huggingface/transformers";
let featureExtractor = await pipeline(
"feature-extraction",
"Xenova/all-MiniLM-L6-v2"
);
export async function removeDuplicates(state: any) {
const embeddings: number[][] = [];
const outputs = await featureExtractor(
state.map(s => s.Event),
{ pooling: "mean", normalize: true }
);
for (const o of outputs) {
embeddings.push(Array.from(o.data));
}
const len = state.length;
for (let i = 0; i < len; i++) {
for (let j = i + 1; j < len; j++) {
if (state[i].score === -1 || state[j].score === -1) continue;
const sim = cos_sim(embeddings[i], embeddings[j]);
console.log(sim)
if (sim > 0.55) {
const scoreI = state[i].score ?? 0;
const scoreJ = state[j].score ?? 0;
if (scoreI > scoreJ) {
state[j].score = -1;
} else if (scoreJ > scoreI) {
state[i].score = -1;
} else {
// if equal, keep earlier
state[j].score = -1;
}
}
}
}
return state;
}
+3 -3
View File
@@ -4,15 +4,15 @@ export async function evaluateWithRoberta({
answer
}: {
answer: string;
}) {
}): Promise<{ validProb: number; invalidProb: number; }> {
const res = await axios.post("http://localhost:8000/evaluate", {
answer
});
// console.log(res.data)
const validProb = res.data["probabilities"][0][0]
const invalidProv = res.data["probabilities"][0][1]
const invalidProb = res.data["probabilities"][0][1]
return validProb > invalidProv ? 1 : 0;
return {validProb, invalidProb};
}
// let res = await evaluateWithRoberta({answer: "High-profile political downplaying of COVID-19 (examples: President Trump saying 'it will go away' in MarchAugust 2020)"});