Fix longstnading bug in wrapper. Add handling to allow for duplicate events to be handelled. Remove analysis script (will replace with more indepth work in main frontend)
This commit is contained in:
@@ -32,21 +32,15 @@ run_wrapper () {
|
||||
npm run dev
|
||||
}
|
||||
|
||||
run_analysis () {
|
||||
cd supporting/scorer
|
||||
python analyse.py
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
agent) run_agent ;;
|
||||
ragas_service) run_ragas_service ;;
|
||||
frontend) run_frontend ;;
|
||||
fetch) run_fetch ;;
|
||||
wrapper) run_wrapper ;;
|
||||
analysis) run_analysis ;;
|
||||
*)
|
||||
echo "Unknown command: $1"
|
||||
echo "Usage: ./runproject [agent|ragas_service|frontend|fetch|wrapper|analysis]"
|
||||
echo "Usage: ./runproject [agent|ragas_service|frontend|fetch|wrapper]"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
+21
-13
@@ -22,7 +22,7 @@ type Claim = {
|
||||
type ResultRecord = {
|
||||
documentUrl: string;
|
||||
text: string;
|
||||
status: "success" | "error";
|
||||
status: "success" | "error" | "wrapper_crash";
|
||||
normalized?: string,
|
||||
output?: any;
|
||||
error?: string;
|
||||
@@ -52,27 +52,35 @@ async function processClaim(claim: Claim): Promise<ResultRecord> {
|
||||
);
|
||||
|
||||
|
||||
let lastMessage: any = null;
|
||||
let lastContent: any = null;
|
||||
|
||||
for await (const chunk of stream) {
|
||||
// capture latest output
|
||||
if (chunk?.data) {
|
||||
lastMessage = chunk.data;
|
||||
}
|
||||
lastContent = chunk
|
||||
}
|
||||
|
||||
return {
|
||||
documentUrl: claim.documentUrl,
|
||||
text: claim.text,
|
||||
status: "success",
|
||||
output: lastMessage.messages?.at(-1) ?? "",
|
||||
normalized: lastMessage.normalizedClaim
|
||||
};
|
||||
if (lastContent?.event != "error") {
|
||||
return {
|
||||
documentUrl: claim.documentUrl,
|
||||
text: claim.text,
|
||||
status: "success",
|
||||
output: lastContent.data.messages?.at(-1) ?? "",
|
||||
normalized: lastContent.data.normalizedClaim
|
||||
};
|
||||
}
|
||||
else {
|
||||
return {
|
||||
documentUrl: claim.documentUrl,
|
||||
text: claim.text,
|
||||
status: "error",
|
||||
dump: lastContent
|
||||
};
|
||||
}
|
||||
} catch (err: any) {
|
||||
return {
|
||||
documentUrl: claim.documentUrl,
|
||||
text: claim.text,
|
||||
status: "error",
|
||||
status: "wrapper_crash",
|
||||
error: err?.message ?? String(err)
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
import json
|
||||
from statistics import mean
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Load JSONL file
|
||||
# ------------------------------------------------------------
|
||||
DATA_FILE = "../../data/results.jsonl"
|
||||
|
||||
data = []
|
||||
with open(DATA_FILE, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
data.append(json.loads(line))
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Extract events
|
||||
# ------------------------------------------------------------
|
||||
all_events = []
|
||||
claims = []
|
||||
|
||||
for item in data:
|
||||
if item.get("status") != "success":
|
||||
continue
|
||||
|
||||
claim_text = item.get("text", "")
|
||||
outputs = item.get("output", [])
|
||||
|
||||
for out in outputs:
|
||||
if "content_parsed" in out:
|
||||
events = out["content_parsed"]
|
||||
|
||||
claims.append({
|
||||
"claim": claim_text,
|
||||
"events": events
|
||||
})
|
||||
|
||||
for ev in events:
|
||||
score = ev["score"]
|
||||
human = ev["human_score"]
|
||||
|
||||
all_events.append({
|
||||
"claim": claim_text,
|
||||
"event": ev["event"],
|
||||
"reason": ev["reasoningWhyRelevant"],
|
||||
"score": score,
|
||||
"human_score": human,
|
||||
"gap": abs(score - human),
|
||||
})
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Compute metrics
|
||||
# ------------------------------------------------------------
|
||||
if not all_events:
|
||||
raise ValueError("No events found in file.")
|
||||
|
||||
avg_score = mean(e["score"] for e in all_events)
|
||||
avg_diff = mean(e["gap"] for e in all_events)
|
||||
|
||||
largest_gap_event = max(all_events, key=lambda x: x["gap"])
|
||||
worst_event = largest_gap_event
|
||||
|
||||
worst_claim_data = next(
|
||||
c for c in claims if c["claim"] == worst_event["claim"]
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Output results
|
||||
# ------------------------------------------------------------
|
||||
print(f"Average score: {avg_score:.4f}")
|
||||
print(f"Average |human_score - score|: {avg_diff:.4f}")
|
||||
|
||||
print("\nLargest gap event:")
|
||||
print(f"Event: {largest_gap_event['event']}")
|
||||
print(f"Score: {largest_gap_event['score']}")
|
||||
print(f"Human score: {largest_gap_event['human_score']}")
|
||||
print(f"Gap: {largest_gap_event['gap']:.4f}")
|
||||
|
||||
print("\nWorst performing event and its claims:")
|
||||
print(f"Claim: {worst_event['claim']}")
|
||||
print(f"Worst Event: {worst_event['event']}")
|
||||
@@ -50,14 +50,45 @@ def load_data(file_path):
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def save_data_clean(file_path, data):
|
||||
merged = {}
|
||||
|
||||
for entry in data:
|
||||
# collect all content_parsed items from this entry
|
||||
events = []
|
||||
for o in entry.get("output", []):
|
||||
if "content_parsed" in o:
|
||||
events.extend(o["content_parsed"])
|
||||
|
||||
doc_url = entry.get("documentUrl")
|
||||
if not doc_url:
|
||||
continue
|
||||
|
||||
if doc_url not in merged:
|
||||
# take the first object's other values
|
||||
new_entry = entry.copy()
|
||||
new_entry["events"] = events
|
||||
|
||||
# remove unwanted fields safely
|
||||
new_entry.pop("output", None)
|
||||
new_entry.pop("status", None)
|
||||
|
||||
merged[doc_url] = new_entry
|
||||
else:
|
||||
# merge events into existing entry
|
||||
merged[doc_url]["events"].extend(events)
|
||||
|
||||
# sort events by human_score
|
||||
for entry in merged.values():
|
||||
entry["events"].sort(
|
||||
key=lambda e: e.get("human_score", 0),
|
||||
reverse=True # highest score first; remove if you want ascending
|
||||
)
|
||||
|
||||
# write merged results
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
for entry in data:
|
||||
for o in entry.get("output", []):
|
||||
if "content_parsed" in o:
|
||||
entry["events"] = o["content_parsed"]
|
||||
del entry["output"]
|
||||
del entry["status"]
|
||||
for entry in merged.values():
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
|
||||
def save_data(file_path, data):
|
||||
@@ -135,10 +166,22 @@ elif view == "Single Claim Random":
|
||||
unscored.append(c)
|
||||
|
||||
if unscored:
|
||||
unscored_entries.append({
|
||||
"entry": entry,
|
||||
"claims": unscored
|
||||
})
|
||||
# try to find an existing entry with same documentUrl
|
||||
existing = next(
|
||||
(item for item in unscored_entries
|
||||
if item["entry"]["documentUrl"] == entry["documentUrl"]),
|
||||
None
|
||||
)
|
||||
|
||||
if existing:
|
||||
# append new claims to existing entry
|
||||
existing["claims"].extend(unscored)
|
||||
else:
|
||||
# create new object
|
||||
unscored_entries.append({
|
||||
"entry": entry,
|
||||
"claims": list(unscored)
|
||||
})
|
||||
|
||||
if unscored_entries:
|
||||
st.session_state.current_claim = random.choice(unscored_entries)
|
||||
@@ -190,7 +233,7 @@ elif view == "Single Claim Random":
|
||||
f"**Reasoning:** {c.get('reasoningWhyRelevant')}"
|
||||
)
|
||||
|
||||
cols = st.columns(5)
|
||||
cols = st.columns(7)
|
||||
|
||||
temp = ""
|
||||
|
||||
@@ -211,6 +254,14 @@ elif view == "Single Claim Random":
|
||||
temp += "STORY " if a else ""
|
||||
|
||||
with cols[4]:
|
||||
a = st.checkbox("Duplicate?", key = "D" + str(idx) + c.get('event') )
|
||||
temp += "DUPLICATE " if a else ""
|
||||
|
||||
with cols[5]:
|
||||
a = st.checkbox("Bias Shown", key = "B" + str(idx) + c.get('event') )
|
||||
temp += "BIAS " if a else ""
|
||||
|
||||
with cols[6]:
|
||||
a = st.checkbox("Perfect", key = "P" + str(idx) + c.get('event') )
|
||||
temp += "PERFECT " if a else ""
|
||||
|
||||
@@ -262,6 +313,8 @@ elif view == "Single Claim Random":
|
||||
if (claim_obj["extra_info"] != ""):
|
||||
if (claim_obj["extra_info"].find("PERFECT") != -1):
|
||||
score = 1
|
||||
elif(claim_obj["extra_info"].find("DUPLICATE") != -1):
|
||||
score = 0
|
||||
else:
|
||||
score *= 0.5
|
||||
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
1. Proposed trigger events rewording the initial prompt should penalised
|
||||
2. Proposed trigger events should be specific enough to create resonable analysis
|
||||
2. Proposed trigger events should be specific enough to create resonable analysis of that specific disinformation spread
|
||||
3. Proposed trigger events should be time-specific. A trigger events that happened after the event is invalid
|
||||
4. Proposed trigger events should not describe a story of propogation (discussions on debunk articles is invalid)
|
||||
5. Proposed trigger events should be sufficiently different from one another
|
||||
6. Proposed trigger events must be free from bias, and backed up by reliable evidence
|
||||
7. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable)
|
||||
8. Proposed trigger events events should have a genuine causal link between the event and the disinformation
|
||||
Reference in New Issue
Block a user