From 78a49e2843425c80dc6a3b99b5807dcd5985e52f Mon Sep 17 00:00:00 2001 From: William Jeynes Date: Thu, 19 Feb 2026 11:36:31 +0000 Subject: [PATCH] Start writing cleaned jsonl output. Re-add sentence to trigger prompt. Fix recursion limit --- agent/prompts/trigger.txt | 1 + agent/tools/retreiveExamples.ts | 2 +- supporting/Wrapper/run.ts | 14 +++++--- supporting/scorer/display.py | 60 ++++++++++++++++++++++++++------- 4 files changed, 59 insertions(+), 18 deletions(-) diff --git a/agent/prompts/trigger.txt b/agent/prompts/trigger.txt index 704e4f6..52e77df 100644 --- a/agent/prompts/trigger.txt +++ b/agent/prompts/trigger.txt @@ -22,5 +22,6 @@ Use your abilities to look between the lines and produce some insightful analysi Events will be reordered as part of processing, each statement must stand alone The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given. +Analysis should only be completed for proposed events that would graner >0.7 points Lets go through it step by step \ No newline at end of file diff --git a/agent/tools/retreiveExamples.ts b/agent/tools/retreiveExamples.ts index a63317b..a7672e4 100644 --- a/agent/tools/retreiveExamples.ts +++ b/agent/tools/retreiveExamples.ts @@ -13,7 +13,7 @@ const CSV_PATHS = [ const CACHE_PATH = "../data/csv.cache.json"; -const JSONL_PATH = "../data/results.jsonl" +const JSONL_PATH = "../data/input.jsonl" type EmbeddingCache = { rawtexts: string[]; diff --git a/supporting/Wrapper/run.ts b/supporting/Wrapper/run.ts index 97b93d7..8e123ba 100644 --- a/supporting/Wrapper/run.ts +++ b/supporting/Wrapper/run.ts @@ -23,8 +23,10 @@ type ResultRecord = { documentUrl: string; text: string; status: "success" | "error"; + normalized?: string, output?: any; error?: string; + dump?: any; }; function appendResult(record: ResultRecord) { @@ -42,10 +44,13 @@ async function processClaim(claim: Claim): Promise { input: { disinformationTitle: claim.text, }, - streamMode: "messages-tuple", - recursionLimit: 100, + streamMode: "values", + config: { + recursion_limit: 50 + } } ); + let lastMessage: any = null; @@ -60,14 +65,15 @@ async function processClaim(claim: Claim): Promise { documentUrl: claim.documentUrl, text: claim.text, status: "success", - output: lastMessage, + output: lastMessage.messages?.at(-1) ?? "", + normalized: lastMessage.normalizedClaim }; } catch (err: any) { return { documentUrl: claim.documentUrl, text: claim.text, status: "error", - error: err?.message ?? String(err), + error: err?.message ?? String(err) }; } } diff --git a/supporting/scorer/display.py b/supporting/scorer/display.py index 489d681..7e81db7 100644 --- a/supporting/scorer/display.py +++ b/supporting/scorer/display.py @@ -4,7 +4,8 @@ import random from pathlib import Path # Path to your JSONL file -DATA_FILE = "../../data/results.jsonl" +INPUT_FILE = "../../data/results.jsonl" +OUTPUT_FILE = "../../data/ranked.jsonl" # -------------------------- # Helper functions @@ -13,21 +14,51 @@ DATA_FILE = "../../data/results.jsonl" def load_data(file_path): """Load JSONL file into a list of dicts with parsed content.""" data = [] + if Path(file_path).exists(): with open(file_path, "r", encoding="utf-8") as f: for line in f: - if line.strip(): - entry = json.loads(line) - for o in entry.get("output", []): - if "content" in o: - try: - o["content_parsed"] = json.loads(o["content"]) - except json.JSONDecodeError: - o["content_parsed"] = [] - print("parse error") - data.append(entry) + if not line.strip(): + continue + + entry = json.loads(line) + + outputs = entry.get("output", []) + + # ---- normalize format ---- + # old format: list + # new format: single dict + if isinstance(outputs, dict): + outputs = [outputs] + + # ---- parse content ---- + for o in outputs: + content = o.get("content") + + if content: + try: + o["content_parsed"] = json.loads(content) + except json.JSONDecodeError: + o["content_parsed"] = [] + print("parse error") + + # optionally store normalized outputs back + entry["output"] = outputs + + data.append(entry) + return data +def save_data_clean(file_path, data): + with open(file_path, "w", encoding="utf-8") as f: + for entry in data: + for o in entry.get("output", []): + if "content_parsed" in o: + entry["events"] = o["content_parsed"] + del entry["output"] + del entry["status"] + f.write(json.dumps(entry, ensure_ascii=False) + "\n") + def save_data(file_path, data): with open(file_path, "w", encoding="utf-8") as f: for entry in data: @@ -39,11 +70,12 @@ def save_data(file_path, data): ) f.write(json.dumps(entry, ensure_ascii=False) + "\n") + # -------------------------- # Session State Init # -------------------------- if "data" not in st.session_state: - st.session_state.data = load_data(DATA_FILE) + st.session_state.data = load_data(INPUT_FILE) if "current_claim" not in st.session_state: st.session_state.current_claim = None @@ -235,7 +267,9 @@ elif view == "Single Claim Random": claim_obj["human_score"] = round(score, 3) - save_data(DATA_FILE, st.session_state.data) + save_data(INPUT_FILE, st.session_state.data) + save_data_clean(OUTPUT_FILE, st.session_state.data) + print("Ranking converted to scores and saved!")