Add some preliminary analysis

This commit is contained in:
William Jeynes
2026-02-16 14:42:47 +00:00
parent 6d478fe7ec
commit 90894b2c10
2 changed files with 84 additions and 1 deletions
+3 -1
View File
@@ -8,7 +8,9 @@
"main": "agent.ts", "main": "agent.ts",
"scripts": { "scripts": {
"agent": "npx @langchain/langgraph-cli dev", "agent": "npx @langchain/langgraph-cli dev",
"ragas_service": "cd ../supporting/RAGAS_Service && .venv/bin/uvicorn ragas_service:app --port 8001" "ragas_service": "cd ../supporting/RAGAS_Service && .venv/bin/uvicorn ragas_service:app --port 8001",
"frontend": "cd ../supporting/scorer && .venv/bin/streamlit run display.py",
"fetch": "cd ../supporting/dbkf & python fetch.py"
}, },
"dependencies": { "dependencies": {
"@huggingface/transformers": "^3.8.1", "@huggingface/transformers": "^3.8.1",
+81
View File
@@ -0,0 +1,81 @@
import json
from statistics import mean
# ------------------------------------------------------------
# Load JSONL file
# ------------------------------------------------------------
DATA_FILE = "../Wrapper/results.jsonl"
data = []
with open(DATA_FILE, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
data.append(json.loads(line))
# ------------------------------------------------------------
# Extract events
# ------------------------------------------------------------
all_events = []
claims = []
for item in data:
if item.get("status") != "success":
continue
claim_text = item.get("text", "")
outputs = item.get("output", [])
for out in outputs:
if "content_parsed" in out:
events = out["content_parsed"]
claims.append({
"claim": claim_text,
"events": events
})
for ev in events:
score = ev["score"]
human = ev["human_score"]
all_events.append({
"claim": claim_text,
"event": ev["event"],
"reason": ev["reasoningWhyRelevant"],
"score": score,
"human_score": human,
"gap": abs(score - human),
})
# ------------------------------------------------------------
# Compute metrics
# ------------------------------------------------------------
if not all_events:
raise ValueError("No events found in file.")
avg_score = mean(e["score"] for e in all_events)
avg_diff = mean(e["gap"] for e in all_events)
largest_gap_event = max(all_events, key=lambda x: x["gap"])
worst_event = largest_gap_event
worst_claim_data = next(
c for c in claims if c["claim"] == worst_event["claim"]
)
# ------------------------------------------------------------
# Output results
# ------------------------------------------------------------
print(f"Average score: {avg_score:.4f}")
print(f"Average |human_score - score|: {avg_diff:.4f}")
print("\nLargest gap event:")
print(f"Event: {largest_gap_event['event']}")
print(f"Score: {largest_gap_event['score']}")
print(f"Human score: {largest_gap_event['human_score']}")
print(f"Gap: {largest_gap_event['gap']:.4f}")
print("\nWorst performing event and its claims:")
print(f"Claim: {worst_event['claim']}")
print(f"Worst Event: {worst_event['event']}")