Refactor scorer for future maintainabiliy

2026-02-26 10:25:49 +00:00
parent 6c3aa7343d
commit 201176e71c
11 changed files with 601 additions and 356 deletions
@@ -0,0 +1,18 @@
+import streamlit as st
+
+def page_title() -> str:
+    return "All Claims"
+
+def render():
+    st.header("All Claims")
+    for entry in st.session_state.data:
+        st.subheader(entry.get("text"))
+
+        for o in entry.get("output", []):
+            for c in o.get("content_parsed", []):
+                st.markdown(f"**Event:** {c.get('event')}")
+                st.markdown(f"**Reasoning:** {c.get('reasoningWhyRelevant')}")
+                st.markdown(f"**Score:** {c.get('score')}")
+                st.markdown(f"**Human Score:** {c.get('human_score')}")
+                st.markdown(f"**Extra Info:** {c.get('extra_info', '')}")
+                st.markdown("---")
@@ -0,0 +1,75 @@
+import random
+import streamlit as st
+from config import INPUT_FILE
+from data_utils import save_data
+
+
+def page_title() -> str:
+    return "Label"
+
+def render():
+    if st.session_state.current_claim is None:
+
+        unannotated = []
+
+        for entry in st.session_state.data:
+            claims = []
+
+            for o in entry.get("output", []):
+                for c in o.get("content_parsed", []):
+                    if not c.get("ranked"):
+                        claims.append(c)
+
+            if claims:
+                unannotated.append({"entry": entry, "claims": claims})
+
+        if unannotated:
+            st.session_state.current_claim = random.choice(unannotated)
+            st.session_state.drag_order = None
+
+    bundle = st.session_state.current_claim
+
+    if bundle is None:
+        st.info("All items annotated.")
+    else:
+        entry = bundle["entry"]
+        claims = bundle["claims"]
+
+        st.subheader(entry.get("text"))
+        st.write(entry.get("normalized", ""))
+
+        st.subheader("Annotate Events")
+
+        for idx, c in enumerate(claims):
+
+            with st.container(border=True):
+
+                st.markdown(f"**Event:** {c.get('event')}")
+                st.markdown(f"**Reasoning:** {c.get('reasoningWhyRelevant')}")
+
+                cols = st.columns(7)
+                temp = ""
+
+                labels = [
+                    ("Rewording", "REWORDING"),
+                    ("Not Specific", "NSPECIFIC"),
+                    ("Time Incorrect", "TINCORRECT"),
+                    ("Story?", "STORY"),
+                    ("Duplicate?", "DUPLICATE"),
+                    ("Bias Shown", "BIAS"),
+                    ("Perfect", "PERFECT"),
+                ]
+
+                for i, (name, tag) in enumerate(labels):
+                    with cols[i]:
+                        if st.checkbox(name, key=f"{tag}{idx}{c.get('event')}"):
+                            temp += tag + " "
+
+                c["extra_info"] = temp.strip()
+                c["ranked"] = True
+
+        if st.button("Save Annotation"):
+            save_data(INPUT_FILE, st.session_state.data)
+            st.session_state.current_claim = None
+            print("Annotation saved")
+            st.rerun()
@@ -0,0 +1,116 @@
+import streamlit as st
+import copy
+import random
+from streamlit_sortables import sort_items
+from config import INPUT_FILE, OUTPUT_FILE
+from data_utils import save_data, save_data_clean
+
+
+def page_title() -> str:
+    return "Rank"
+
+def render():
+    st.header("Rank PERFECT Events")
+    candidates = []
+
+    for entry in st.session_state.data:
+        perfect = []
+
+        for o in entry.get("output", []):
+            for c in o.get("content_parsed", []):
+                if "PERFECT" in c.get("extra_info", "") and not c.get("rank_position"):
+                    perfect.append(c)
+
+        if perfect:
+            candidates.append({"entry": entry, "claims": perfect})
+
+    if not candidates:
+        st.info("No PERFECT events available.")
+        st.stop()
+
+    if "current_bundle" not in st.session_state:
+        st.session_state.current_bundle = random.choice(candidates)
+
+    bundle = st.session_state.current_bundle
+    entry = bundle["entry"]
+    claims = bundle["claims"]
+
+    st.subheader(entry.get("text"))
+
+    # init
+    if "perfect_order" not in st.session_state:
+        st.session_state.perfect_order = list(range(len(claims)))
+
+    order = st.session_state.perfect_order
+
+    # labels shown in sortable UI
+    labels = [
+        f"{i+1}. {claims[idx].get('event')}"
+        for i, idx in enumerate(order)
+    ]
+
+    st.markdown("### Drag to reorder:")
+
+    # -------------------------
+    # Drag & drop UI
+    # -------------------------
+    new_labels = sort_items(labels)
+
+    # Convert reordered labels back → indices
+    if new_labels != labels:
+        new_order = []
+        for lbl in new_labels:
+            original_pos = labels.index(lbl)
+            new_order.append(order[original_pos])
+
+        st.session_state.perfect_order = new_order
+        order = new_order
+
+    st.markdown("---")
+    for rank, idx in enumerate(order):
+        c = claims[idx]
+        st.markdown(f"**Rank {rank+1}: {c.get('event')}**")
+        st.markdown(c.get("reasoningWhyRelevant"))
+        st.markdown("---")
+
+    if st.button("Submit PERFECT Ranking"):
+
+        n = len(order)
+
+        for rank_position, idx in enumerate(order):
+            claim_obj = claims[idx]
+
+            # explicit stored rank
+            claim_obj["rank_position"] = rank_position + 1
+
+            claim_obj["human_score"] = 1
+
+        # Auto-scoring
+        for entry in st.session_state.data:
+            for o in entry.get("output", []):
+                for c in o.get("content_parsed", []):
+
+                    if c.get("human_score") is not None:
+                        continue
+
+                    extra = c.get("extra_info", "")
+
+                    if "DUPLICATE" in extra:
+                        c["human_score"] = 0
+                    elif extra:
+                        c["human_score"] = round(
+                            c.get("score", 0) * 0.5, 3
+                        )
+
+        save_data(INPUT_FILE, st.session_state.data)
+        save_data_clean(
+            OUTPUT_FILE,
+            copy.deepcopy(st.session_state.data)
+        )
+
+        # reset state for next example
+        del st.session_state.current_bundle
+        del st.session_state.perfect_order
+
+        print("Ranking saved!")
+        st.rerun()
@@ -0,0 +1,9 @@
+import streamlit as st
+
+
+def page_title() -> str:
+    return "View Rules"
+
+def render():
+    with open("rules.txt", "r", encoding="utf-8") as f:
+        st.write(f.read())
@@ -0,0 +1,42 @@
+from collections import Counter, defaultdict
+import streamlit as st
+import pandas as pd
+
+def page_title() -> str:
+    return "Statistics"
+
+def render():
+    st.header("Statistics")
+
+    word_counter = Counter()
+    doc_scores = defaultdict(list)
+    diff_scores = defaultdict(list)
+
+    # ---- collect stats ----
+    for entry in st.session_state.data:
+        doc_url = entry.get("documentUrl")
+
+        for o in entry.get("output", []):
+            for c in o.get("content_parsed", []):
+
+                # ---- extra_info word counts ----
+                extra = c.get("extra_info", "")
+                if extra:
+                    words = extra.strip().split()
+                    word_counter.update(words)
+
+    # --------------------------
+    # Extra Info Word Counts
+    # --------------------------
+    st.subheader("Extra Info Label Counts")
+
+    if word_counter:
+        df_words = (
+            pd.DataFrame(word_counter.items(), columns=["Label", "Count"])
+            .sort_values("Count", ascending=False)
+        )
+
+        st.dataframe(df_words)
+        st.bar_chart(df_words.set_index("Label"))
+    else:
+        st.info("No extra_info data available yet.")