Redo rules a little bit. Update fetch to retreive only from some sources. Add statistics to display, fix rules display

This commit is contained in:
William Jeynes
2026-02-23 21:56:27 +00:00
parent cca3c42f5b
commit 3d0cacd24e
3 changed files with 161 additions and 15 deletions
+13 -9
View File
@@ -9,16 +9,20 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"
# Ukraine: http://weverify.eu/resource/Concept/Q212
# COVID: http://weverify.eu/resource/Concept/Q84263196
DEFAULT_PARAMS = {
"concept": "http://weverify.eu/resource/Concept/Q212",
# "documentTypes": "http://schema.org/Claim",
"from": "2000-01-01",
"to": "2026-02-19",
"lang": "en",
"limit": 5000, # Max per page
"page": 1,
"orderBy": "date"
}
DEFAULT_PARAMS = [
("concept", "http://weverify.eu/resource/Concept/Q212"),
("from", "2000-01-01"),
("to", "2026-02-19"),
("lang", "en"),
("limit", 5000),
("page", 1),
("orderBy", "date"),
# duplicate keys allowed
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact
]
NUM_RANDOM_CLAIMS = 20
+135 -2
View File
@@ -3,6 +3,8 @@ import streamlit as st
import json
import random
from pathlib import Path
from collections import Counter, defaultdict
import pandas as pd
# Path to your JSONL file
INPUT_FILE = "../../data/results.jsonl"
@@ -127,7 +129,7 @@ st.title("Claim Visualizer")
# --------------------------
view = st.sidebar.selectbox(
"Choose View",
["All Claims", "Single Claim Random", "View Rules"]
["All Claims", "Single Claim Random", "View Rules", "Statistics"]
)
# --------------------------
@@ -334,4 +336,135 @@ elif view == "Single Claim Random":
elif view == "View Rules":
with open("rules.txt", "r", encoding="utf-8") as f:
st.write(f.readlines())
st.write(f.read())
elif view == "Statistics":
st.header("Statistics")
word_counter = Counter()
doc_scores = defaultdict(list)
diff_scores = defaultdict(list)
# ---- collect stats ----
for entry in st.session_state.data:
doc_url = entry.get("documentUrl")
for o in entry.get("output", []):
for c in o.get("content_parsed", []):
# ---- extra_info word counts ----
extra = c.get("extra_info", "")
if extra:
words = extra.strip().split()
word_counter.update(words)
# ---- human score aggregation ----
hs = c.get("human_score")
if hs is not None and doc_url:
doc_scores[doc_url].append(hs)
# ---- diff score aggregation ----
s = c.get("score")
if hs is not None and s is not None and doc_url:
diff = abs(hs - s)
diff_scores[doc_url].append(diff)
# ==========================
# Extra Info Word Counts
# ==========================
st.subheader("Extra Info Label Counts")
if word_counter:
df_words = (
pd.DataFrame(word_counter.items(), columns=["Label", "Count"])
.sort_values("Count", ascending=False)
)
st.dataframe(df_words)
st.bar_chart(df_words.set_index("Label"))
else:
st.info("No extra_info data available yet.")
# ==========================
# Avg Human Score per Document
# ==========================
st.subheader("Average Human Score per documentUrl")
avg_scores = []
for doc, scores in doc_scores.items():
if scores:
avg_scores.append({
"documentUrl": doc,
"avg_human_score": sum(scores) / len(scores),
"num_events": len(scores)
})
if avg_scores:
df_scores = pd.DataFrame(avg_scores).sort_values(
"avg_human_score",
ascending=False
)
st.dataframe(df_scores)
# ==========================
# Distribution (rounded to 0.1)
# ==========================
st.subheader("Distribution of Average Human Scores (Rounded to 0.1)")
# round averages to nearest 0.1
df_scores["rounded_score"] = (
df_scores["avg_human_score"].round(1)
)
# count how many docs fall into each bucket
dist = (
df_scores["rounded_score"]
.value_counts()
.sort_index()
.reset_index()
)
dist.columns = ["rounded_score", "count"]
# ensure all bins from 0.0 → 1.0 exist
all_bins = pd.DataFrame({
"rounded_score": [round(x * 0.1, 1) for x in range(11)]
})
dist = (
all_bins.merge(dist, on="rounded_score", how="left")
.fillna(0)
)
dist["count"] = dist["count"].astype(int)
# plot counts per score bucket
st.bar_chart(
dist.set_index("rounded_score")["count"]
)
else:
st.info("No human scores available yet.")
# ==========================
# Overall Model vs Human Difference
# ==========================
st.subheader("Model vs Human Agreement")
all_diffs = [
diff
for diffs in diff_scores.values()
for diff in diffs
]
if all_diffs:
avg_diff = sum(all_diffs) / len(all_diffs)
st.write(
f"Average absolute difference between model score and human score: "
f"**{avg_diff:.3f}**"
)
else:
st.info("No items have both score and human_score yet.")
+11 -2
View File
@@ -1,8 +1,17 @@
1. Proposed trigger events rewording the initial prompt should penalised
2. Proposed trigger events should be specific enough to create resonable analysis of that specific disinformation spread
2.1. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable)
2.2. Proposed trigger events events should have a genuine causal link between the event and the disinformation
2.3 Proposed trigger events should avoid speculation
3. Proposed trigger events should be time-specific. A trigger events that happened after the event is invalid
4. Proposed trigger events should not describe a story of propogation (discussions on debunk articles is invalid)
5. Proposed trigger events should be sufficiently different from one another
6. Proposed trigger events must be free from bias, and backed up by reliable evidence
7. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable)
8. Proposed trigger events events should have a genuine causal link between the event and the disinformation