Start writing cleaned jsonl output. Re-add sentence to trigger prompt. Fix recursion limit
This commit is contained in:
@@ -22,5 +22,6 @@ Use your abilities to look between the lines and produce some insightful analysi
|
|||||||
Events will be reordered as part of processing, each statement must stand alone
|
Events will be reordered as part of processing, each statement must stand alone
|
||||||
|
|
||||||
The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
|
The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
|
||||||
|
Analysis should only be completed for proposed events that would graner >0.7 points
|
||||||
|
|
||||||
Lets go through it step by step
|
Lets go through it step by step
|
||||||
@@ -13,7 +13,7 @@ const CSV_PATHS = [
|
|||||||
|
|
||||||
const CACHE_PATH = "../data/csv.cache.json";
|
const CACHE_PATH = "../data/csv.cache.json";
|
||||||
|
|
||||||
const JSONL_PATH = "../data/results.jsonl"
|
const JSONL_PATH = "../data/input.jsonl"
|
||||||
|
|
||||||
type EmbeddingCache = {
|
type EmbeddingCache = {
|
||||||
rawtexts: string[];
|
rawtexts: string[];
|
||||||
|
|||||||
@@ -23,8 +23,10 @@ type ResultRecord = {
|
|||||||
documentUrl: string;
|
documentUrl: string;
|
||||||
text: string;
|
text: string;
|
||||||
status: "success" | "error";
|
status: "success" | "error";
|
||||||
|
normalized?: string,
|
||||||
output?: any;
|
output?: any;
|
||||||
error?: string;
|
error?: string;
|
||||||
|
dump?: any;
|
||||||
};
|
};
|
||||||
|
|
||||||
function appendResult(record: ResultRecord) {
|
function appendResult(record: ResultRecord) {
|
||||||
@@ -42,11 +44,14 @@ async function processClaim(claim: Claim): Promise<ResultRecord> {
|
|||||||
input: {
|
input: {
|
||||||
disinformationTitle: claim.text,
|
disinformationTitle: claim.text,
|
||||||
},
|
},
|
||||||
streamMode: "messages-tuple",
|
streamMode: "values",
|
||||||
recursionLimit: 100,
|
config: {
|
||||||
|
recursion_limit: 50
|
||||||
|
}
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
let lastMessage: any = null;
|
let lastMessage: any = null;
|
||||||
|
|
||||||
for await (const chunk of stream) {
|
for await (const chunk of stream) {
|
||||||
@@ -60,14 +65,15 @@ async function processClaim(claim: Claim): Promise<ResultRecord> {
|
|||||||
documentUrl: claim.documentUrl,
|
documentUrl: claim.documentUrl,
|
||||||
text: claim.text,
|
text: claim.text,
|
||||||
status: "success",
|
status: "success",
|
||||||
output: lastMessage,
|
output: lastMessage.messages?.at(-1) ?? "",
|
||||||
|
normalized: lastMessage.normalizedClaim
|
||||||
};
|
};
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
return {
|
return {
|
||||||
documentUrl: claim.documentUrl,
|
documentUrl: claim.documentUrl,
|
||||||
text: claim.text,
|
text: claim.text,
|
||||||
status: "error",
|
status: "error",
|
||||||
error: err?.message ?? String(err),
|
error: err?.message ?? String(err)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ import random
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Path to your JSONL file
|
# Path to your JSONL file
|
||||||
DATA_FILE = "../../data/results.jsonl"
|
INPUT_FILE = "../../data/results.jsonl"
|
||||||
|
OUTPUT_FILE = "../../data/ranked.jsonl"
|
||||||
|
|
||||||
# --------------------------
|
# --------------------------
|
||||||
# Helper functions
|
# Helper functions
|
||||||
@@ -13,21 +14,51 @@ DATA_FILE = "../../data/results.jsonl"
|
|||||||
def load_data(file_path):
|
def load_data(file_path):
|
||||||
"""Load JSONL file into a list of dicts with parsed content."""
|
"""Load JSONL file into a list of dicts with parsed content."""
|
||||||
data = []
|
data = []
|
||||||
|
|
||||||
if Path(file_path).exists():
|
if Path(file_path).exists():
|
||||||
with open(file_path, "r", encoding="utf-8") as f:
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
if line.strip():
|
if not line.strip():
|
||||||
entry = json.loads(line)
|
continue
|
||||||
for o in entry.get("output", []):
|
|
||||||
if "content" in o:
|
entry = json.loads(line)
|
||||||
try:
|
|
||||||
o["content_parsed"] = json.loads(o["content"])
|
outputs = entry.get("output", [])
|
||||||
except json.JSONDecodeError:
|
|
||||||
o["content_parsed"] = []
|
# ---- normalize format ----
|
||||||
print("parse error")
|
# old format: list
|
||||||
data.append(entry)
|
# new format: single dict
|
||||||
|
if isinstance(outputs, dict):
|
||||||
|
outputs = [outputs]
|
||||||
|
|
||||||
|
# ---- parse content ----
|
||||||
|
for o in outputs:
|
||||||
|
content = o.get("content")
|
||||||
|
|
||||||
|
if content:
|
||||||
|
try:
|
||||||
|
o["content_parsed"] = json.loads(content)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
o["content_parsed"] = []
|
||||||
|
print("parse error")
|
||||||
|
|
||||||
|
# optionally store normalized outputs back
|
||||||
|
entry["output"] = outputs
|
||||||
|
|
||||||
|
data.append(entry)
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def save_data_clean(file_path, data):
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
for entry in data:
|
||||||
|
for o in entry.get("output", []):
|
||||||
|
if "content_parsed" in o:
|
||||||
|
entry["events"] = o["content_parsed"]
|
||||||
|
del entry["output"]
|
||||||
|
del entry["status"]
|
||||||
|
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||||
|
|
||||||
def save_data(file_path, data):
|
def save_data(file_path, data):
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
for entry in data:
|
for entry in data:
|
||||||
@@ -39,11 +70,12 @@ def save_data(file_path, data):
|
|||||||
)
|
)
|
||||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||||
|
|
||||||
|
|
||||||
# --------------------------
|
# --------------------------
|
||||||
# Session State Init
|
# Session State Init
|
||||||
# --------------------------
|
# --------------------------
|
||||||
if "data" not in st.session_state:
|
if "data" not in st.session_state:
|
||||||
st.session_state.data = load_data(DATA_FILE)
|
st.session_state.data = load_data(INPUT_FILE)
|
||||||
|
|
||||||
if "current_claim" not in st.session_state:
|
if "current_claim" not in st.session_state:
|
||||||
st.session_state.current_claim = None
|
st.session_state.current_claim = None
|
||||||
@@ -235,7 +267,9 @@ elif view == "Single Claim Random":
|
|||||||
|
|
||||||
claim_obj["human_score"] = round(score, 3)
|
claim_obj["human_score"] = round(score, 3)
|
||||||
|
|
||||||
save_data(DATA_FILE, st.session_state.data)
|
save_data(INPUT_FILE, st.session_state.data)
|
||||||
|
save_data_clean(OUTPUT_FILE, st.session_state.data)
|
||||||
|
|
||||||
|
|
||||||
print("Ranking converted to scores and saved!")
|
print("Ranking converted to scores and saved!")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user