diff --git a/supporting/dbkf/fetch.py b/supporting/dbkf/fetch.py index 994d3cd..531622c 100644 --- a/supporting/dbkf/fetch.py +++ b/supporting/dbkf/fetch.py @@ -9,16 +9,20 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents" # Ukraine: http://weverify.eu/resource/Concept/Q212 # COVID: http://weverify.eu/resource/Concept/Q84263196 -DEFAULT_PARAMS = { - "concept": "http://weverify.eu/resource/Concept/Q212", - # "documentTypes": "http://schema.org/Claim", - "from": "2000-01-01", - "to": "2026-02-19", - "lang": "en", - "limit": 5000, # Max per page - "page": 1, - "orderBy": "date" -} +# "documentTypes": "http://schema.org/Claim", +DEFAULT_PARAMS = [ + ("concept", "http://weverify.eu/resource/Concept/Q212"), + ("from", "2000-01-01"), + ("to", "2026-02-19"), + ("lang", "en"), + ("limit", 5000), + ("page", 1), + ("orderBy", "date"), + + # duplicate keys allowed + ("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake + ("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact +] NUM_RANDOM_CLAIMS = 20 diff --git a/supporting/scorer/display.py b/supporting/scorer/display.py index dd4f4be..e53550c 100644 --- a/supporting/scorer/display.py +++ b/supporting/scorer/display.py @@ -3,6 +3,8 @@ import streamlit as st import json import random from pathlib import Path +from collections import Counter, defaultdict +import pandas as pd # Path to your JSONL file INPUT_FILE = "../../data/results.jsonl" @@ -127,7 +129,7 @@ st.title("Claim Visualizer") # -------------------------- view = st.sidebar.selectbox( "Choose View", - ["All Claims", "Single Claim Random", "View Rules"] + ["All Claims", "Single Claim Random", "View Rules", "Statistics"] ) # -------------------------- @@ -334,4 +336,135 @@ elif view == "Single Claim Random": elif view == "View Rules": with open("rules.txt", "r", encoding="utf-8") as f: - st.write(f.readlines()) + st.write(f.read()) + +elif view == "Statistics": + + st.header("Statistics") + + word_counter = Counter() + doc_scores = defaultdict(list) + diff_scores = defaultdict(list) + + # ---- collect stats ---- + for entry in st.session_state.data: + doc_url = entry.get("documentUrl") + + for o in entry.get("output", []): + for c in o.get("content_parsed", []): + + # ---- extra_info word counts ---- + extra = c.get("extra_info", "") + if extra: + words = extra.strip().split() + word_counter.update(words) + + # ---- human score aggregation ---- + hs = c.get("human_score") + if hs is not None and doc_url: + doc_scores[doc_url].append(hs) + + # ---- diff score aggregation ---- + s = c.get("score") + if hs is not None and s is not None and doc_url: + diff = abs(hs - s) + diff_scores[doc_url].append(diff) + + # ========================== + # Extra Info Word Counts + # ========================== + st.subheader("Extra Info Label Counts") + + if word_counter: + df_words = ( + pd.DataFrame(word_counter.items(), columns=["Label", "Count"]) + .sort_values("Count", ascending=False) + ) + + st.dataframe(df_words) + st.bar_chart(df_words.set_index("Label")) + else: + st.info("No extra_info data available yet.") + + # ========================== + # Avg Human Score per Document + # ========================== + st.subheader("Average Human Score per documentUrl") + + avg_scores = [] + + for doc, scores in doc_scores.items(): + if scores: + avg_scores.append({ + "documentUrl": doc, + "avg_human_score": sum(scores) / len(scores), + "num_events": len(scores) + }) + + if avg_scores: + df_scores = pd.DataFrame(avg_scores).sort_values( + "avg_human_score", + ascending=False + ) + + st.dataframe(df_scores) + # ========================== + # Distribution (rounded to 0.1) + # ========================== + + st.subheader("Distribution of Average Human Scores (Rounded to 0.1)") + + # round averages to nearest 0.1 + df_scores["rounded_score"] = ( + df_scores["avg_human_score"].round(1) + ) + + # count how many docs fall into each bucket + dist = ( + df_scores["rounded_score"] + .value_counts() + .sort_index() + .reset_index() + ) + + dist.columns = ["rounded_score", "count"] + + # ensure all bins from 0.0 → 1.0 exist + all_bins = pd.DataFrame({ + "rounded_score": [round(x * 0.1, 1) for x in range(11)] + }) + + dist = ( + all_bins.merge(dist, on="rounded_score", how="left") + .fillna(0) + ) + + dist["count"] = dist["count"].astype(int) + + # plot counts per score bucket + st.bar_chart( + dist.set_index("rounded_score")["count"] + ) + else: + st.info("No human scores available yet.") + + # ========================== + # Overall Model vs Human Difference + # ========================== + st.subheader("Model vs Human Agreement") + + all_diffs = [ + diff + for diffs in diff_scores.values() + for diff in diffs + ] + + if all_diffs: + avg_diff = sum(all_diffs) / len(all_diffs) + + st.write( + f"Average absolute difference between model score and human score: " + f"**{avg_diff:.3f}**" + ) + else: + st.info("No items have both score and human_score yet.") \ No newline at end of file diff --git a/supporting/scorer/rules.txt b/supporting/scorer/rules.txt index 2a73f80..3f3aca8 100644 --- a/supporting/scorer/rules.txt +++ b/supporting/scorer/rules.txt @@ -1,8 +1,17 @@ 1. Proposed trigger events rewording the initial prompt should penalised + 2. Proposed trigger events should be specific enough to create resonable analysis of that specific disinformation spread + + 2.1. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable) + + 2.2. Proposed trigger events events should have a genuine causal link between the event and the disinformation + + 2.3 Proposed trigger events should avoid speculation + 3. Proposed trigger events should be time-specific. A trigger events that happened after the event is invalid + 4. Proposed trigger events should not describe a story of propogation (discussions on debunk articles is invalid) + 5. Proposed trigger events should be sufficiently different from one another -6. Proposed trigger events must be free from bias, and backed up by reliable evidence -7. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable) -8. Proposed trigger events events should have a genuine causal link between the event and the disinformation \ No newline at end of file + +6. Proposed trigger events must be free from bias, and backed up by reliable evidence \ No newline at end of file