Redo rules a little bit. Update fetch to retreive only from some sources. Add statistics to display, fix rules display

2026-02-23 21:56:27 +00:00
parent cca3c42f5b
commit 3d0cacd24e
3 changed files with 161 additions and 15 deletions
@@ -9,16 +9,20 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"
 # Ukraine: http://weverify.eu/resource/Concept/Q212
 # COVID: http://weverify.eu/resource/Concept/Q84263196
-DEFAULT_PARAMS = {
+# "documentTypes": "http://schema.org/Claim",
-    "concept": "http://weverify.eu/resource/Concept/Q212",
+DEFAULT_PARAMS = [
-    # "documentTypes": "http://schema.org/Claim",
+    ("concept", "http://weverify.eu/resource/Concept/Q212"),
-    "from": "2000-01-01",
+    ("from", "2000-01-01"),
-    "to": "2026-02-19",
+    ("to", "2026-02-19"),
-    "lang": "en",
+    ("lang", "en"),
-    "limit": 5000,  # Max per page
+    ("limit", 5000),
-    "page": 1,
+    ("page", 1),
-    "orderBy": "date"
+    ("orderBy", "date"),
-}
+
    # duplicate keys allowed
    ("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
    ("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact
 ]
 NUM_RANDOM_CLAIMS = 20
@@ -3,6 +3,8 @@ import streamlit as st
 import json
 import random
 from pathlib import Path
 from collections import Counter, defaultdict
 import pandas as pd
 # Path to your JSONL file
 INPUT_FILE = "../../data/results.jsonl"
@@ -127,7 +129,7 @@ st.title("Claim Visualizer")
 # --------------------------
 view = st.sidebar.selectbox(
    "Choose View",
-    ["All Claims", "Single Claim Random", "View Rules"]
+    ["All Claims", "Single Claim Random", "View Rules", "Statistics"]
 )
 # --------------------------
@@ -334,4 +336,135 @@ elif view == "Single Claim Random":
 elif view == "View Rules":
    with open("rules.txt", "r", encoding="utf-8") as f:
-        st.write(f.readlines())
+        st.write(f.read())
 elif view == "Statistics":
    st.header("Statistics")
    word_counter = Counter()
    doc_scores = defaultdict(list)
    diff_scores = defaultdict(list)
    # ---- collect stats ----
    for entry in st.session_state.data:
        doc_url = entry.get("documentUrl")
        for o in entry.get("output", []):
            for c in o.get("content_parsed", []):
                # ---- extra_info word counts ----
                extra = c.get("extra_info", "")
                if extra:
                    words = extra.strip().split()
                    word_counter.update(words)
                # ---- human score aggregation ----
                hs = c.get("human_score")
                if hs is not None and doc_url:
                    doc_scores[doc_url].append(hs)
                # ---- diff score aggregation ----
                s = c.get("score")
                if hs is not None and s is not None and doc_url:
                    diff = abs(hs - s)
                    diff_scores[doc_url].append(diff)
    # ==========================
    # Extra Info Word Counts
    # ==========================
    st.subheader("Extra Info Label Counts")
    if word_counter:
        df_words = (
            pd.DataFrame(word_counter.items(), columns=["Label", "Count"])
            .sort_values("Count", ascending=False)
        )
        st.dataframe(df_words)
        st.bar_chart(df_words.set_index("Label"))
    else:
        st.info("No extra_info data available yet.")
    # ==========================
    # Avg Human Score per Document
    # ==========================
    st.subheader("Average Human Score per documentUrl")
    avg_scores = []
    for doc, scores in doc_scores.items():
        if scores:
            avg_scores.append({
                "documentUrl": doc,
                "avg_human_score": sum(scores) / len(scores),
                "num_events": len(scores)
            })
    if avg_scores:
        df_scores = pd.DataFrame(avg_scores).sort_values(
            "avg_human_score",
            ascending=False
        )
        st.dataframe(df_scores)
        # ==========================
        # Distribution (rounded to 0.1)
        # ==========================
        st.subheader("Distribution of Average Human Scores (Rounded to 0.1)")
        # round averages to nearest 0.1
        df_scores["rounded_score"] = (
            df_scores["avg_human_score"].round(1)
        )
        # count how many docs fall into each bucket
        dist = (
            df_scores["rounded_score"]
            .value_counts()
            .sort_index()
            .reset_index()
        )
        dist.columns = ["rounded_score", "count"]
        # ensure all bins from 0.0 → 1.0 exist
        all_bins = pd.DataFrame({
            "rounded_score": [round(x * 0.1, 1) for x in range(11)]
        })
        dist = (
            all_bins.merge(dist, on="rounded_score", how="left")
            .fillna(0)
        )
        dist["count"] = dist["count"].astype(int)
        # plot counts per score bucket
        st.bar_chart(
            dist.set_index("rounded_score")["count"]
        )
    else:
        st.info("No human scores available yet.")
    # ==========================
    # Overall Model vs Human Difference
    # ==========================
    st.subheader("Model vs Human Agreement")
    all_diffs = [
        diff
        for diffs in diff_scores.values()
        for diff in diffs
    ]
    if all_diffs:
        avg_diff = sum(all_diffs) / len(all_diffs)
        st.write(
            f"Average absolute difference between model score and human score: "
            f"**{avg_diff:.3f}**"
        )
    else:
        st.info("No items have both score and human_score yet.")
@@ -1,8 +1,17 @@
 1. Proposed trigger events rewording the initial prompt should penalised
 2. Proposed trigger events should be specific enough to create resonable analysis of that specific disinformation spread
    2.1. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable)
    2.2. Proposed trigger events events should have a genuine causal link between the event and the disinformation
    2.3 Proposed trigger events should avoid speculation
 3. Proposed trigger events should be time-specific. A trigger events that happened after the event is invalid
 4. Proposed trigger events should not describe a story of propogation (discussions on debunk articles is invalid)
 5. Proposed trigger events should be sufficiently different from one another
 6. Proposed trigger events must be free from bias, and backed up by reliable evidence
 7. Proposed trigger events should be verifiable (general ideas of sentiment are not admissable)
 8. Proposed trigger events events should have a genuine causal link between the event and the disinformation