Start writing cleaned jsonl output. Re-add sentence to trigger prompt. Fix recursion limit

This commit is contained in:
William Jeynes
2026-02-19 11:36:31 +00:00
parent 6f20ade780
commit 78a49e2843
4 changed files with 59 additions and 18 deletions
+1
View File
@@ -22,5 +22,6 @@ Use your abilities to look between the lines and produce some insightful analysi
Events will be reordered as part of processing, each statement must stand alone
The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
Analysis should only be completed for proposed events that would graner >0.7 points
Lets go through it step by step
+1 -1
View File
@@ -13,7 +13,7 @@ const CSV_PATHS = [
const CACHE_PATH = "../data/csv.cache.json";
const JSONL_PATH = "../data/results.jsonl"
const JSONL_PATH = "../data/input.jsonl"
type EmbeddingCache = {
rawtexts: string[];
+10 -4
View File
@@ -23,8 +23,10 @@ type ResultRecord = {
documentUrl: string;
text: string;
status: "success" | "error";
normalized?: string,
output?: any;
error?: string;
dump?: any;
};
function appendResult(record: ResultRecord) {
@@ -42,10 +44,13 @@ async function processClaim(claim: Claim): Promise<ResultRecord> {
input: {
disinformationTitle: claim.text,
},
streamMode: "messages-tuple",
recursionLimit: 100,
streamMode: "values",
config: {
recursion_limit: 50
}
}
);
let lastMessage: any = null;
@@ -60,14 +65,15 @@ async function processClaim(claim: Claim): Promise<ResultRecord> {
documentUrl: claim.documentUrl,
text: claim.text,
status: "success",
output: lastMessage,
output: lastMessage.messages?.at(-1) ?? "",
normalized: lastMessage.normalizedClaim
};
} catch (err: any) {
return {
documentUrl: claim.documentUrl,
text: claim.text,
status: "error",
error: err?.message ?? String(err),
error: err?.message ?? String(err)
};
}
}
+47 -13
View File
@@ -4,7 +4,8 @@ import random
from pathlib import Path
# Path to your JSONL file
DATA_FILE = "../../data/results.jsonl"
INPUT_FILE = "../../data/results.jsonl"
OUTPUT_FILE = "../../data/ranked.jsonl"
# --------------------------
# Helper functions
@@ -13,21 +14,51 @@ DATA_FILE = "../../data/results.jsonl"
def load_data(file_path):
"""Load JSONL file into a list of dicts with parsed content."""
data = []
if Path(file_path).exists():
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
if line.strip():
entry = json.loads(line)
for o in entry.get("output", []):
if "content" in o:
try:
o["content_parsed"] = json.loads(o["content"])
except json.JSONDecodeError:
o["content_parsed"] = []
print("parse error")
data.append(entry)
if not line.strip():
continue
entry = json.loads(line)
outputs = entry.get("output", [])
# ---- normalize format ----
# old format: list
# new format: single dict
if isinstance(outputs, dict):
outputs = [outputs]
# ---- parse content ----
for o in outputs:
content = o.get("content")
if content:
try:
o["content_parsed"] = json.loads(content)
except json.JSONDecodeError:
o["content_parsed"] = []
print("parse error")
# optionally store normalized outputs back
entry["output"] = outputs
data.append(entry)
return data
def save_data_clean(file_path, data):
with open(file_path, "w", encoding="utf-8") as f:
for entry in data:
for o in entry.get("output", []):
if "content_parsed" in o:
entry["events"] = o["content_parsed"]
del entry["output"]
del entry["status"]
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
def save_data(file_path, data):
with open(file_path, "w", encoding="utf-8") as f:
for entry in data:
@@ -39,11 +70,12 @@ def save_data(file_path, data):
)
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
# --------------------------
# Session State Init
# --------------------------
if "data" not in st.session_state:
st.session_state.data = load_data(DATA_FILE)
st.session_state.data = load_data(INPUT_FILE)
if "current_claim" not in st.session_state:
st.session_state.current_claim = None
@@ -235,7 +267,9 @@ elif view == "Single Claim Random":
claim_obj["human_score"] = round(score, 3)
save_data(DATA_FILE, st.session_state.data)
save_data(INPUT_FILE, st.session_state.data)
save_data_clean(OUTPUT_FILE, st.session_state.data)
print("Ranking converted to scores and saved!")