Add filtering from known disinformation sources

2026-02-22 15:14:58 +00:00
parent 8ffe8dec82
commit d1ab938c0b
6 changed files with 90 additions and 8 deletions
@@ -11,16 +11,16 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"

 DEFAULT_PARAMS = {
    "concept": "http://weverify.eu/resource/Concept/Q212",
-    "documentTypes": "http://schema.org/Claim",
+    # "documentTypes": "http://schema.org/Claim",
    "from": "2000-01-01",
-    "to": "2026-10-17",
+    "to": "2026-02-19",
    "lang": "en",
-    "limit": 300,  # Max per page
+    "limit": 5000,  # Max per page
    "page": 1,
    "orderBy": "date"
 }

-NUM_RANDOM_CLAIMS = 10
+NUM_RANDOM_CLAIMS = 20

 OUTPUT_FILE = "../../data/claims.json"

@@ -1,3 +1,4 @@
+import copy
 import streamlit as st
 import json
 import random
@@ -154,6 +155,7 @@ elif view == "Single Claim Random":
        claims = bundle["claims"]

        st.subheader(entry.get("text"))
+        st.write(entry.get("normalized", ""))

        # --------------------------
        # Stable Drag IDs (FIX)
@@ -267,7 +269,7 @@ elif view == "Single Claim Random":
                claim_obj["human_score"] = round(score, 3)

            save_data(INPUT_FILE, st.session_state.data)
-            save_data_clean(OUTPUT_FILE, st.session_state.data)
+            save_data_clean(OUTPUT_FILE, copy.deepcopy(st.session_state.data))


            print("Ranking converted to scores and saved!")