From cca3c42f5babdd57f9c8f5507fe4fd1b3af71209 Mon Sep 17 00:00:00 2001 From: William Jeynes Date: Sun, 22 Feb 2026 23:12:14 +0000 Subject: [PATCH] Fix longstnading bug in wrapper. Add handling to allow for duplicate events to be handelled. Remove analysis script (will replace with more indepth work in main frontend) --- run.sh | 8 +--- supporting/Wrapper/run.ts | 38 ++++++++++------- supporting/scorer/analyse.py | 81 ------------------------------------ supporting/scorer/display.py | 77 ++++++++++++++++++++++++++++------ supporting/scorer/rules.txt | 8 +++- 5 files changed, 95 insertions(+), 117 deletions(-) delete mode 100644 supporting/scorer/analyse.py diff --git a/run.sh b/run.sh index 6390df2..7ced1b7 100755 --- a/run.sh +++ b/run.sh @@ -32,21 +32,15 @@ run_wrapper () { npm run dev } -run_analysis () { - cd supporting/scorer - python analyse.py -} - case "$1" in agent) run_agent ;; ragas_service) run_ragas_service ;; frontend) run_frontend ;; fetch) run_fetch ;; wrapper) run_wrapper ;; - analysis) run_analysis ;; *) echo "Unknown command: $1" - echo "Usage: ./runproject [agent|ragas_service|frontend|fetch|wrapper|analysis]" + echo "Usage: ./runproject [agent|ragas_service|frontend|fetch|wrapper]" exit 1 ;; esac diff --git a/supporting/Wrapper/run.ts b/supporting/Wrapper/run.ts index 8e123ba..1987390 100644 --- a/supporting/Wrapper/run.ts +++ b/supporting/Wrapper/run.ts @@ -22,7 +22,7 @@ type Claim = { type ResultRecord = { documentUrl: string; text: string; - status: "success" | "error"; + status: "success" | "error" | "wrapper_crash"; normalized?: string, output?: any; error?: string; @@ -50,29 +50,37 @@ async function processClaim(claim: Claim): Promise { } } ); - - let lastMessage: any = null; + + let lastContent: any = null; for await (const chunk of stream) { // capture latest output - if (chunk?.data) { - lastMessage = chunk.data; - } + lastContent = chunk + } + + if (lastContent?.event != "error") { + return { + documentUrl: claim.documentUrl, + text: claim.text, + status: "success", + output: lastContent.data.messages?.at(-1) ?? "", + normalized: lastContent.data.normalizedClaim + }; + } + else { + return { + documentUrl: claim.documentUrl, + text: claim.text, + status: "error", + dump: lastContent + }; } - - return { - documentUrl: claim.documentUrl, - text: claim.text, - status: "success", - output: lastMessage.messages?.at(-1) ?? "", - normalized: lastMessage.normalizedClaim - }; } catch (err: any) { return { documentUrl: claim.documentUrl, text: claim.text, - status: "error", + status: "wrapper_crash", error: err?.message ?? String(err) }; } diff --git a/supporting/scorer/analyse.py b/supporting/scorer/analyse.py deleted file mode 100644 index c5cb6d5..0000000 --- a/supporting/scorer/analyse.py +++ /dev/null @@ -1,81 +0,0 @@ -import json -from statistics import mean - -# ------------------------------------------------------------ -# Load JSONL file -# ------------------------------------------------------------ -DATA_FILE = "../../data/results.jsonl" - -data = [] -with open(DATA_FILE, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if line: - data.append(json.loads(line)) - -# ------------------------------------------------------------ -# Extract events -# ------------------------------------------------------------ -all_events = [] -claims = [] - -for item in data: - if item.get("status") != "success": - continue - - claim_text = item.get("text", "") - outputs = item.get("output", []) - - for out in outputs: - if "content_parsed" in out: - events = out["content_parsed"] - - claims.append({ - "claim": claim_text, - "events": events - }) - - for ev in events: - score = ev["score"] - human = ev["human_score"] - - all_events.append({ - "claim": claim_text, - "event": ev["event"], - "reason": ev["reasoningWhyRelevant"], - "score": score, - "human_score": human, - "gap": abs(score - human), - }) - -# ------------------------------------------------------------ -# Compute metrics -# ------------------------------------------------------------ -if not all_events: - raise ValueError("No events found in file.") - -avg_score = mean(e["score"] for e in all_events) -avg_diff = mean(e["gap"] for e in all_events) - -largest_gap_event = max(all_events, key=lambda x: x["gap"]) -worst_event = largest_gap_event - -worst_claim_data = next( - c for c in claims if c["claim"] == worst_event["claim"] -) - -# ------------------------------------------------------------ -# Output results -# ------------------------------------------------------------ -print(f"Average score: {avg_score:.4f}") -print(f"Average |human_score - score|: {avg_diff:.4f}") - -print("\nLargest gap event:") -print(f"Event: {largest_gap_event['event']}") -print(f"Score: {largest_gap_event['score']}") -print(f"Human score: {largest_gap_event['human_score']}") -print(f"Gap: {largest_gap_event['gap']:.4f}") - -print("\nWorst performing event and its claims:") -print(f"Claim: {worst_event['claim']}") -print(f"Worst Event: {worst_event['event']}") diff --git a/supporting/scorer/display.py b/supporting/scorer/display.py index 355fef8..dd4f4be 100644 --- a/supporting/scorer/display.py +++ b/supporting/scorer/display.py @@ -50,14 +50,45 @@ def load_data(file_path): return data + def save_data_clean(file_path, data): + merged = {} + + for entry in data: + # collect all content_parsed items from this entry + events = [] + for o in entry.get("output", []): + if "content_parsed" in o: + events.extend(o["content_parsed"]) + + doc_url = entry.get("documentUrl") + if not doc_url: + continue + + if doc_url not in merged: + # take the first object's other values + new_entry = entry.copy() + new_entry["events"] = events + + # remove unwanted fields safely + new_entry.pop("output", None) + new_entry.pop("status", None) + + merged[doc_url] = new_entry + else: + # merge events into existing entry + merged[doc_url]["events"].extend(events) + + # sort events by human_score + for entry in merged.values(): + entry["events"].sort( + key=lambda e: e.get("human_score", 0), + reverse=True # highest score first; remove if you want ascending + ) + + # write merged results with open(file_path, "w", encoding="utf-8") as f: - for entry in data: - for o in entry.get("output", []): - if "content_parsed" in o: - entry["events"] = o["content_parsed"] - del entry["output"] - del entry["status"] + for entry in merged.values(): f.write(json.dumps(entry, ensure_ascii=False) + "\n") def save_data(file_path, data): @@ -135,10 +166,22 @@ elif view == "Single Claim Random": unscored.append(c) if unscored: - unscored_entries.append({ - "entry": entry, - "claims": unscored - }) + # try to find an existing entry with same documentUrl + existing = next( + (item for item in unscored_entries + if item["entry"]["documentUrl"] == entry["documentUrl"]), + None + ) + + if existing: + # append new claims to existing entry + existing["claims"].extend(unscored) + else: + # create new object + unscored_entries.append({ + "entry": entry, + "claims": list(unscored) + }) if unscored_entries: st.session_state.current_claim = random.choice(unscored_entries) @@ -190,7 +233,7 @@ elif view == "Single Claim Random": f"**Reasoning:** {c.get('reasoningWhyRelevant')}" ) - cols = st.columns(5) + cols = st.columns(7) temp = "" @@ -209,8 +252,16 @@ elif view == "Single Claim Random": with cols[3]: a = st.checkbox("Story?", key = "Y" + str(idx) + c.get('event') ) temp += "STORY " if a else "" - + with cols[4]: + a = st.checkbox("Duplicate?", key = "D" + str(idx) + c.get('event') ) + temp += "DUPLICATE " if a else "" + + with cols[5]: + a = st.checkbox("Bias Shown", key = "B" + str(idx) + c.get('event') ) + temp += "BIAS " if a else "" + + with cols[6]: a = st.checkbox("Perfect", key = "P" + str(idx) + c.get('event') ) temp += "PERFECT " if a else "" @@ -262,6 +313,8 @@ elif view == "Single Claim Random": if (claim_obj["extra_info"] != ""): if (claim_obj["extra_info"].find("PERFECT") != -1): score = 1 + elif(claim_obj["extra_info"].find("DUPLICATE") != -1): + score = 0 else: score *= 0.5 diff --git a/supporting/scorer/rules.txt b/supporting/scorer/rules.txt index 291a2d6..2a73f80 100644 --- a/supporting/scorer/rules.txt +++ b/supporting/scorer/rules.txt @@ -1,4 +1,8 @@ 1. Proposed trigger events rewording the initial prompt should penalised -2. Proposed trigger events should be specific enough to create resonable analysis +2. Proposed trigger events should be specific enough to create resonable analysis of that specific disinformation spread 3. Proposed trigger events should be time-specific. A trigger events that happened after the event is invalid -4. Proposed trigger events should not describe a story of propogation (discussions on debunk articles is invalid) \ No newline at end of file +4. Proposed trigger events should not describe a story of propogation (discussions on debunk articles is invalid) +5. Proposed trigger events should be sufficiently different from one another +6. Proposed trigger events must be free from bias, and backed up by reliable evidence +7. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable) +8. Proposed trigger events events should have a genuine causal link between the event and the disinformation \ No newline at end of file