Fix longstnading bug in wrapper. Add handling to allow for duplicate events to be handelled. Remove analysis script (will replace with more indepth work in main frontend)

This commit is contained in:
William Jeynes
2026-02-22 23:12:14 +00:00
parent 4d92f14527
commit cca3c42f5b
5 changed files with 95 additions and 117 deletions
+1 -7
View File
@@ -32,21 +32,15 @@ run_wrapper () {
npm run dev
}
run_analysis () {
cd supporting/scorer
python analyse.py
}
case "$1" in
agent) run_agent ;;
ragas_service) run_ragas_service ;;
frontend) run_frontend ;;
fetch) run_fetch ;;
wrapper) run_wrapper ;;
analysis) run_analysis ;;
*)
echo "Unknown command: $1"
echo "Usage: ./runproject [agent|ragas_service|frontend|fetch|wrapper|analysis]"
echo "Usage: ./runproject [agent|ragas_service|frontend|fetch|wrapper]"
exit 1
;;
esac
+16 -8
View File
@@ -22,7 +22,7 @@ type Claim = {
type ResultRecord = {
documentUrl: string;
text: string;
status: "success" | "error";
status: "success" | "error" | "wrapper_crash";
normalized?: string,
output?: any;
error?: string;
@@ -52,27 +52,35 @@ async function processClaim(claim: Claim): Promise<ResultRecord> {
);
let lastMessage: any = null;
let lastContent: any = null;
for await (const chunk of stream) {
// capture latest output
if (chunk?.data) {
lastMessage = chunk.data;
}
lastContent = chunk
}
if (lastContent?.event != "error") {
return {
documentUrl: claim.documentUrl,
text: claim.text,
status: "success",
output: lastMessage.messages?.at(-1) ?? "",
normalized: lastMessage.normalizedClaim
output: lastContent.data.messages?.at(-1) ?? "",
normalized: lastContent.data.normalizedClaim
};
} catch (err: any) {
}
else {
return {
documentUrl: claim.documentUrl,
text: claim.text,
status: "error",
dump: lastContent
};
}
} catch (err: any) {
return {
documentUrl: claim.documentUrl,
text: claim.text,
status: "wrapper_crash",
error: err?.message ?? String(err)
};
}
-81
View File
@@ -1,81 +0,0 @@
import json
from statistics import mean
# ------------------------------------------------------------
# Load JSONL file
# ------------------------------------------------------------
DATA_FILE = "../../data/results.jsonl"
data = []
with open(DATA_FILE, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
data.append(json.loads(line))
# ------------------------------------------------------------
# Extract events
# ------------------------------------------------------------
all_events = []
claims = []
for item in data:
if item.get("status") != "success":
continue
claim_text = item.get("text", "")
outputs = item.get("output", [])
for out in outputs:
if "content_parsed" in out:
events = out["content_parsed"]
claims.append({
"claim": claim_text,
"events": events
})
for ev in events:
score = ev["score"]
human = ev["human_score"]
all_events.append({
"claim": claim_text,
"event": ev["event"],
"reason": ev["reasoningWhyRelevant"],
"score": score,
"human_score": human,
"gap": abs(score - human),
})
# ------------------------------------------------------------
# Compute metrics
# ------------------------------------------------------------
if not all_events:
raise ValueError("No events found in file.")
avg_score = mean(e["score"] for e in all_events)
avg_diff = mean(e["gap"] for e in all_events)
largest_gap_event = max(all_events, key=lambda x: x["gap"])
worst_event = largest_gap_event
worst_claim_data = next(
c for c in claims if c["claim"] == worst_event["claim"]
)
# ------------------------------------------------------------
# Output results
# ------------------------------------------------------------
print(f"Average score: {avg_score:.4f}")
print(f"Average |human_score - score|: {avg_diff:.4f}")
print("\nLargest gap event:")
print(f"Event: {largest_gap_event['event']}")
print(f"Score: {largest_gap_event['score']}")
print(f"Human score: {largest_gap_event['human_score']}")
print(f"Gap: {largest_gap_event['gap']:.4f}")
print("\nWorst performing event and its claims:")
print(f"Claim: {worst_event['claim']}")
print(f"Worst Event: {worst_event['event']}")
+59 -6
View File
@@ -50,14 +50,45 @@ def load_data(file_path):
return data
def save_data_clean(file_path, data):
with open(file_path, "w", encoding="utf-8") as f:
merged = {}
for entry in data:
# collect all content_parsed items from this entry
events = []
for o in entry.get("output", []):
if "content_parsed" in o:
entry["events"] = o["content_parsed"]
del entry["output"]
del entry["status"]
events.extend(o["content_parsed"])
doc_url = entry.get("documentUrl")
if not doc_url:
continue
if doc_url not in merged:
# take the first object's other values
new_entry = entry.copy()
new_entry["events"] = events
# remove unwanted fields safely
new_entry.pop("output", None)
new_entry.pop("status", None)
merged[doc_url] = new_entry
else:
# merge events into existing entry
merged[doc_url]["events"].extend(events)
# sort events by human_score
for entry in merged.values():
entry["events"].sort(
key=lambda e: e.get("human_score", 0),
reverse=True # highest score first; remove if you want ascending
)
# write merged results
with open(file_path, "w", encoding="utf-8") as f:
for entry in merged.values():
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
def save_data(file_path, data):
@@ -135,9 +166,21 @@ elif view == "Single Claim Random":
unscored.append(c)
if unscored:
# try to find an existing entry with same documentUrl
existing = next(
(item for item in unscored_entries
if item["entry"]["documentUrl"] == entry["documentUrl"]),
None
)
if existing:
# append new claims to existing entry
existing["claims"].extend(unscored)
else:
# create new object
unscored_entries.append({
"entry": entry,
"claims": unscored
"claims": list(unscored)
})
if unscored_entries:
@@ -190,7 +233,7 @@ elif view == "Single Claim Random":
f"**Reasoning:** {c.get('reasoningWhyRelevant')}"
)
cols = st.columns(5)
cols = st.columns(7)
temp = ""
@@ -211,6 +254,14 @@ elif view == "Single Claim Random":
temp += "STORY " if a else ""
with cols[4]:
a = st.checkbox("Duplicate?", key = "D" + str(idx) + c.get('event') )
temp += "DUPLICATE " if a else ""
with cols[5]:
a = st.checkbox("Bias Shown", key = "B" + str(idx) + c.get('event') )
temp += "BIAS " if a else ""
with cols[6]:
a = st.checkbox("Perfect", key = "P" + str(idx) + c.get('event') )
temp += "PERFECT " if a else ""
@@ -262,6 +313,8 @@ elif view == "Single Claim Random":
if (claim_obj["extra_info"] != ""):
if (claim_obj["extra_info"].find("PERFECT") != -1):
score = 1
elif(claim_obj["extra_info"].find("DUPLICATE") != -1):
score = 0
else:
score *= 0.5
+5 -1
View File
@@ -1,4 +1,8 @@
1. Proposed trigger events rewording the initial prompt should penalised
2. Proposed trigger events should be specific enough to create resonable analysis
2. Proposed trigger events should be specific enough to create resonable analysis of that specific disinformation spread
3. Proposed trigger events should be time-specific. A trigger events that happened after the event is invalid
4. Proposed trigger events should not describe a story of propogation (discussions on debunk articles is invalid)
5. Proposed trigger events should be sufficiently different from one another
6. Proposed trigger events must be free from bias, and backed up by reliable evidence
7. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable)
8. Proposed trigger events events should have a genuine causal link between the event and the disinformation