Redo rules a little bit. Update fetch to retreive only from some sources. Add statistics to display, fix rules display

2026-02-23 21:56:27 +00:00
parent cca3c42f5b
commit 3d0cacd24e
3 changed files with 161 additions and 15 deletions
@@ -9,16 +9,20 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"
 # Ukraine: http://weverify.eu/resource/Concept/Q212
 # COVID: http://weverify.eu/resource/Concept/Q84263196

-DEFAULT_PARAMS = {
-    "concept": "http://weverify.eu/resource/Concept/Q212",
 # "documentTypes": "http://schema.org/Claim",
-    "from": "2000-01-01",
-    "to": "2026-02-19",
-    "lang": "en",
-    "limit": 5000,  # Max per page
-    "page": 1,
-    "orderBy": "date"
-}
+DEFAULT_PARAMS = [
+    ("concept", "http://weverify.eu/resource/Concept/Q212"),
+    ("from", "2000-01-01"),
+    ("to", "2026-02-19"),
+    ("lang", "en"),
+    ("limit", 5000),
+    ("page", 1),
+    ("orderBy", "date"),
+
+    # duplicate keys allowed
+    ("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
+    ("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact
+]

 NUM_RANDOM_CLAIMS = 20

@@ -3,6 +3,8 @@ import streamlit as st
 import json
 import random
 from pathlib import Path
+from collections import Counter, defaultdict
+import pandas as pd

 # Path to your JSONL file
 INPUT_FILE = "../../data/results.jsonl"
@@ -127,7 +129,7 @@ st.title("Claim Visualizer")
 # --------------------------
 view = st.sidebar.selectbox(
    "Choose View",
-    ["All Claims", "Single Claim Random", "View Rules"]
+    ["All Claims", "Single Claim Random", "View Rules", "Statistics"]
 )

 # --------------------------
@@ -334,4 +336,135 @@ elif view == "Single Claim Random":

 elif view == "View Rules":
    with open("rules.txt", "r", encoding="utf-8") as f:
-        st.write(f.readlines())
+        st.write(f.read())
+
+elif view == "Statistics":
+
+    st.header("Statistics")
+
+    word_counter = Counter()
+    doc_scores = defaultdict(list)
+    diff_scores = defaultdict(list)
+
+    # ---- collect stats ----
+    for entry in st.session_state.data:
+        doc_url = entry.get("documentUrl")
+
+        for o in entry.get("output", []):
+            for c in o.get("content_parsed", []):
+
+                # ---- extra_info word counts ----
+                extra = c.get("extra_info", "")
+                if extra:
+                    words = extra.strip().split()
+                    word_counter.update(words)
+
+                # ---- human score aggregation ----
+                hs = c.get("human_score")
+                if hs is not None and doc_url:
+                    doc_scores[doc_url].append(hs)
+
+                # ---- diff score aggregation ----
+                s = c.get("score")
+                if hs is not None and s is not None and doc_url:
+                    diff = abs(hs - s)
+                    diff_scores[doc_url].append(diff)
+
+    # ==========================
+    # Extra Info Word Counts
+    # ==========================
+    st.subheader("Extra Info Label Counts")
+
+    if word_counter:
+        df_words = (
+            pd.DataFrame(word_counter.items(), columns=["Label", "Count"])
+            .sort_values("Count", ascending=False)
+        )
+
+        st.dataframe(df_words)
+        st.bar_chart(df_words.set_index("Label"))
+    else:
+        st.info("No extra_info data available yet.")
+
+    # ==========================
+    # Avg Human Score per Document
+    # ==========================
+    st.subheader("Average Human Score per documentUrl")
+
+    avg_scores = []
+
+    for doc, scores in doc_scores.items():
+        if scores:
+            avg_scores.append({
+                "documentUrl": doc,
+                "avg_human_score": sum(scores) / len(scores),
+                "num_events": len(scores)
+            })
+
+    if avg_scores:
+        df_scores = pd.DataFrame(avg_scores).sort_values(
+            "avg_human_score",
+            ascending=False
+        )
+
+        st.dataframe(df_scores)
+        # ==========================
+        # Distribution (rounded to 0.1)
+        # ==========================
+
+        st.subheader("Distribution of Average Human Scores (Rounded to 0.1)")
+
+        # round averages to nearest 0.1
+        df_scores["rounded_score"] = (
+            df_scores["avg_human_score"].round(1)
+        )
+
+        # count how many docs fall into each bucket
+        dist = (
+            df_scores["rounded_score"]
+            .value_counts()
+            .sort_index()
+            .reset_index()
+        )
+
+        dist.columns = ["rounded_score", "count"]
+
+        # ensure all bins from 0.0 → 1.0 exist
+        all_bins = pd.DataFrame({
+            "rounded_score": [round(x * 0.1, 1) for x in range(11)]
+        })
+
+        dist = (
+            all_bins.merge(dist, on="rounded_score", how="left")
+            .fillna(0)
+        )
+
+        dist["count"] = dist["count"].astype(int)
+
+        # plot counts per score bucket
+        st.bar_chart(
+            dist.set_index("rounded_score")["count"]
+        )
+    else:
+        st.info("No human scores available yet.")
+
+    # ==========================
+    # Overall Model vs Human Difference
+    # ==========================
+    st.subheader("Model vs Human Agreement")
+
+    all_diffs = [
+        diff
+        for diffs in diff_scores.values()
+        for diff in diffs
+    ]
+
+    if all_diffs:
+        avg_diff = sum(all_diffs) / len(all_diffs)
+
+        st.write(
+            f"Average absolute difference between model score and human score: "
+            f"**{avg_diff:.3f}**"
+        )
+    else:
+        st.info("No items have both score and human_score yet.")
@@ -1,8 +1,17 @@
 1. Proposed trigger events rewording the initial prompt should penalised
+
 2. Proposed trigger events should be specific enough to create resonable analysis of that specific disinformation spread
+
+    2.1. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable)
+
+    2.2. Proposed trigger events events should have a genuine causal link between the event and the disinformation
+
+    2.3 Proposed trigger events should avoid speculation
+
 3. Proposed trigger events should be time-specific. A trigger events that happened after the event is invalid
+
 4. Proposed trigger events should not describe a story of propogation (discussions on debunk articles is invalid)
+
 5. Proposed trigger events should be sufficiently different from one another
+
 6. Proposed trigger events must be free from bias, and backed up by reliable evidence
-7. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable)
-8. Proposed trigger events events should have a genuine causal link between the event and the disinformation