diff --git a/supporting/scorer/.gitignore b/supporting/scorer/.gitignore new file mode 100644 index 0000000..64d49ae --- /dev/null +++ b/supporting/scorer/.gitignore @@ -0,0 +1,216 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +# Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +# poetry.lock +# poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +# pdm.lock +# pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +# pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# Redis +*.rdb +*.aof +*.pid + +# RabbitMQ +mnesia/ +rabbitmq/ +rabbitmq-data/ + +# ActiveMQ +activemq-data/ + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# .idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ + +# Streamlit +.streamlit/secrets.toml \ No newline at end of file diff --git a/supporting/scorer/config.py b/supporting/scorer/config.py new file mode 100644 index 0000000..6ea9d1e --- /dev/null +++ b/supporting/scorer/config.py @@ -0,0 +1,4 @@ +INPUT_FILE = "../../data/results.jsonl" +OUTPUT_FILE = "../../data/ranked.jsonl" + +PAGE_TITLE = "Claim Visualizer" \ No newline at end of file diff --git a/supporting/scorer/data_utils.py b/supporting/scorer/data_utils.py new file mode 100644 index 0000000..0739010 --- /dev/null +++ b/supporting/scorer/data_utils.py @@ -0,0 +1,76 @@ +import json +from pathlib import Path + +def load_data(file_path): + data = [] + + if Path(file_path).exists(): + with open(file_path, "r", encoding="utf-8") as f: + for line in f: + if not line.strip(): + continue + + entry = json.loads(line) + outputs = entry.get("output", []) + + if isinstance(outputs, dict): + outputs = [outputs] + + for o in outputs: + content = o.get("content") + if content: + try: + o["content_parsed"] = json.loads(content) + except json.JSONDecodeError: + o["content_parsed"] = [] + + entry["output"] = outputs + data.append(entry) + + return data + + +def save_data_clean(file_path, data): + merged = {} + + for entry in data: + events = [] + for o in entry.get("output", []): + if "content_parsed" in o: + events.extend(o["content_parsed"]) + + doc_url = entry.get("documentUrl") + if not doc_url: + continue + + if doc_url not in merged: + new_entry = entry.copy() + new_entry["events"] = events + new_entry.pop("output", None) + new_entry.pop("status", None) + merged[doc_url] = new_entry + else: + merged[doc_url]["events"].extend(events) + + for entry in merged.values(): + entry["events"].sort( + key=lambda e: e.get("human_score", 0), + reverse=True + ) + + with open(file_path, "w", encoding="utf-8") as f: + for entry in merged.values(): + f.write(json.dumps(entry, ensure_ascii=False) + "\n") + + +def save_data(file_path, data): + with open(file_path, "w", encoding="utf-8") as f: + for entry in data: + for o in entry.get("output", []): + if "content_parsed" in o: + o["content"] = json.dumps( + o["content_parsed"], + ensure_ascii=False + ) + f.write(json.dumps(entry, ensure_ascii=False) + "\n") + diff --git a/supporting/scorer/display.py b/supporting/scorer/display.py index 2e6ce31..76806b4 100644 --- a/supporting/scorer/display.py +++ b/supporting/scorer/display.py @@ -1,366 +1,42 @@ -import copy +import importlib +import pkgutil import streamlit as st -import json -import random -from pathlib import Path -from collections import Counter, defaultdict -import pandas as pd -from streamlit_sortables import sort_items - -INPUT_FILE = "../../data/results.jsonl" -OUTPUT_FILE = "../../data/ranked.jsonl" - -# -------------------------- -# Helper functions -# -------------------------- -def load_data(file_path): - data = [] - - if Path(file_path).exists(): - with open(file_path, "r", encoding="utf-8") as f: - for line in f: - if not line.strip(): - continue - - entry = json.loads(line) - outputs = entry.get("output", []) - - if isinstance(outputs, dict): - outputs = [outputs] - - for o in outputs: - content = o.get("content") - if content: - try: - o["content_parsed"] = json.loads(content) - except json.JSONDecodeError: - o["content_parsed"] = [] - - entry["output"] = outputs - data.append(entry) - - return data - - -def save_data_clean(file_path, data): - merged = {} - - for entry in data: - events = [] - for o in entry.get("output", []): - if "content_parsed" in o: - events.extend(o["content_parsed"]) - - doc_url = entry.get("documentUrl") - if not doc_url: - continue - - if doc_url not in merged: - new_entry = entry.copy() - new_entry["events"] = events - new_entry.pop("output", None) - new_entry.pop("status", None) - merged[doc_url] = new_entry - else: - merged[doc_url]["events"].extend(events) - - for entry in merged.values(): - entry["events"].sort( - key=lambda e: e.get("human_score", 0), - reverse=True - ) - - with open(file_path, "w", encoding="utf-8") as f: - for entry in merged.values(): - f.write(json.dumps(entry, ensure_ascii=False) + "\n") - - -def save_data(file_path, data): - with open(file_path, "w", encoding="utf-8") as f: - for entry in data: - for o in entry.get("output", []): - if "content_parsed" in o: - o["content"] = json.dumps( - o["content_parsed"], - ensure_ascii=False - ) - f.write(json.dumps(entry, ensure_ascii=False) + "\n") - - -# -------------------------- -# Session State Init -# -------------------------- -if "data" not in st.session_state: - st.session_state.data = load_data(INPUT_FILE) - -if "current_claim" not in st.session_state: - st.session_state.current_claim = None - -if "drag_order" not in st.session_state: - st.session_state.drag_order = None +from config import PAGE_TITLE +from state import init_state +import views st.set_page_config( - page_title="Claim Visualizer", + page_title=PAGE_TITLE, layout="wide", - initial_sidebar_state="expanded" + initial_sidebar_state="expanded", ) -st.title("Claim Visualizer") -# -------------------------- -# Sidebar -# -------------------------- +init_state() +st.title(PAGE_TITLE) + +def discover_views(): + routes = {} + + for module_info in pkgutil.iter_modules(views.__path__): + module_name = module_info.name + + module = importlib.import_module(f"views.{module_name}") + + if hasattr(module, "render") and hasattr(module, "page_title"): + title = module.page_title() + routes[title] = module.render + + return routes + + +ROUTES = discover_views() + +# optional: stable ordering +ROUTES = dict(sorted(ROUTES.items())) + view = st.sidebar.selectbox( "Choose View", - [ - "All Claims", - "Single Claim Random", - "Rank Perfect Events", - "View Rules", - "Statistics" - ] + list(ROUTES.keys()), ) -# -------------------------- -# View/AllClaims -# -------------------------- -if view == "All Claims": - st.header("All Claims") - for entry in st.session_state.data: - st.subheader(entry.get("text")) - - for o in entry.get("output", []): - for c in o.get("content_parsed", []): - st.markdown(f"**Event:** {c.get('event')}") - st.markdown(f"**Reasoning:** {c.get('reasoningWhyRelevant')}") - st.markdown(f"**Score:** {c.get('score')}") - st.markdown(f"**Human Score:** {c.get('human_score')}") - st.markdown(f"**Extra Info:** {c.get('extra_info', '')}") - st.markdown("---") - -# -------------------------- -# View/Annotate -# -------------------------- -elif view == "Single Claim Random": - - if st.session_state.current_claim is None: - - unannotated = [] - - for entry in st.session_state.data: - claims = [] - - for o in entry.get("output", []): - for c in o.get("content_parsed", []): - if not c.get("ranked"): - claims.append(c) - - if claims: - unannotated.append({"entry": entry, "claims": claims}) - - if unannotated: - st.session_state.current_claim = random.choice(unannotated) - st.session_state.drag_order = None - - bundle = st.session_state.current_claim - - if bundle is None: - st.info("All items annotated.") - else: - entry = bundle["entry"] - claims = bundle["claims"] - - st.subheader(entry.get("text")) - st.write(entry.get("normalized", "")) - - st.subheader("Annotate Events") - - for idx, c in enumerate(claims): - - with st.container(border=True): - - st.markdown(f"**Event:** {c.get('event')}") - st.markdown(f"**Reasoning:** {c.get('reasoningWhyRelevant')}") - - cols = st.columns(7) - temp = "" - - labels = [ - ("Rewording", "REWORDING"), - ("Not Specific", "NSPECIFIC"), - ("Time Incorrect", "TINCORRECT"), - ("Story?", "STORY"), - ("Duplicate?", "DUPLICATE"), - ("Bias Shown", "BIAS"), - ("Perfect", "PERFECT"), - ] - - for i, (name, tag) in enumerate(labels): - with cols[i]: - if st.checkbox(name, key=f"{tag}{idx}{c.get('event')}"): - temp += tag + " " - - c["extra_info"] = temp.strip() - c["ranked"] = True - - if st.button("Save Annotation"): - save_data(INPUT_FILE, st.session_state.data) - st.session_state.current_claim = None - print("Annotation saved") - st.rerun() - -# -------------------------- -# View/Rank -# -------------------------- - -elif view == "Rank Perfect Events": - - st.header("Rank PERFECT Events") - candidates = [] - - for entry in st.session_state.data: - perfect = [] - - for o in entry.get("output", []): - for c in o.get("content_parsed", []): - if "PERFECT" in c.get("extra_info", "") and not c.get("rank_position"): - perfect.append(c) - - if perfect: - candidates.append({"entry": entry, "claims": perfect}) - - if not candidates: - st.info("No PERFECT events available.") - st.stop() - - if "current_bundle" not in st.session_state: - st.session_state.current_bundle = random.choice(candidates) - - bundle = st.session_state.current_bundle - entry = bundle["entry"] - claims = bundle["claims"] - - st.subheader(entry.get("text")) - - # init - if "perfect_order" not in st.session_state: - st.session_state.perfect_order = list(range(len(claims))) - - order = st.session_state.perfect_order - - # labels shown in sortable UI - labels = [ - f"{i+1}. {claims[idx].get('event')}" - for i, idx in enumerate(order) - ] - - st.markdown("### Drag to reorder:") - - # ------------------------- - # Drag & drop UI - # ------------------------- - new_labels = sort_items(labels) - - # Convert reordered labels back → indices - if new_labels != labels: - new_order = [] - for lbl in new_labels: - original_pos = labels.index(lbl) - new_order.append(order[original_pos]) - - st.session_state.perfect_order = new_order - order = new_order - - st.markdown("---") - for rank, idx in enumerate(order): - c = claims[idx] - st.markdown(f"**Rank {rank+1}: {c.get('event')}**") - st.markdown(c.get("reasoningWhyRelevant")) - st.markdown("---") - - if st.button("Submit PERFECT Ranking"): - - n = len(order) - - for rank_position, idx in enumerate(order): - claim_obj = claims[idx] - - # explicit stored rank - claim_obj["rank_position"] = rank_position + 1 - - claim_obj["human_score"] = 1 - - # Auto-scoring - for entry in st.session_state.data: - for o in entry.get("output", []): - for c in o.get("content_parsed", []): - - if c.get("human_score") is not None: - continue - - extra = c.get("extra_info", "") - - if "DUPLICATE" in extra: - c["human_score"] = 0 - elif extra: - c["human_score"] = round( - c.get("score", 0) * 0.5, 3 - ) - - save_data(INPUT_FILE, st.session_state.data) - save_data_clean( - OUTPUT_FILE, - copy.deepcopy(st.session_state.data) - ) - - # reset state for next example - del st.session_state.current_bundle - del st.session_state.perfect_order - - print("Ranking saved!") - st.rerun() - -# -------------------------- -# View/Rules -# -------------------------- -elif view == "View Rules": - with open("rules.txt", "r", encoding="utf-8") as f: - st.write(f.read()) - -# -------------------------- -# View/Statistics -# -------------------------- -elif view == "Statistics": - st.header("Statistics") - - word_counter = Counter() - doc_scores = defaultdict(list) - diff_scores = defaultdict(list) - - # ---- collect stats ---- - for entry in st.session_state.data: - doc_url = entry.get("documentUrl") - - for o in entry.get("output", []): - for c in o.get("content_parsed", []): - - # ---- extra_info word counts ---- - extra = c.get("extra_info", "") - if extra: - words = extra.strip().split() - word_counter.update(words) - - # -------------------------- - # Extra Info Word Counts - # -------------------------- - st.subheader("Extra Info Label Counts") - - if word_counter: - df_words = ( - pd.DataFrame(word_counter.items(), columns=["Label", "Count"]) - .sort_values("Count", ascending=False) - ) - - st.dataframe(df_words) - st.bar_chart(df_words.set_index("Label")) - else: - st.info("No extra_info data available yet.") \ No newline at end of file +ROUTES[view]() \ No newline at end of file diff --git a/supporting/scorer/state.py b/supporting/scorer/state.py new file mode 100644 index 0000000..eacb76c --- /dev/null +++ b/supporting/scorer/state.py @@ -0,0 +1,13 @@ +import streamlit as st +from config import INPUT_FILE +from data_utils import load_data + +def init_state(): + if "data" not in st.session_state: + st.session_state.data = load_data(INPUT_FILE) + + if "current_claim" not in st.session_state: + st.session_state.current_claim = None + + if "drag_order" not in st.session_state: + st.session_state.drag_order = None \ No newline at end of file diff --git a/supporting/scorer/views/__init__.py b/supporting/scorer/views/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/supporting/scorer/views/all_claims.py b/supporting/scorer/views/all_claims.py new file mode 100644 index 0000000..bb45699 --- /dev/null +++ b/supporting/scorer/views/all_claims.py @@ -0,0 +1,18 @@ +import streamlit as st + +def page_title() -> str: + return "All Claims" + +def render(): + st.header("All Claims") + for entry in st.session_state.data: + st.subheader(entry.get("text")) + + for o in entry.get("output", []): + for c in o.get("content_parsed", []): + st.markdown(f"**Event:** {c.get('event')}") + st.markdown(f"**Reasoning:** {c.get('reasoningWhyRelevant')}") + st.markdown(f"**Score:** {c.get('score')}") + st.markdown(f"**Human Score:** {c.get('human_score')}") + st.markdown(f"**Extra Info:** {c.get('extra_info', '')}") + st.markdown("---") \ No newline at end of file diff --git a/supporting/scorer/views/label.py b/supporting/scorer/views/label.py new file mode 100644 index 0000000..dcce216 --- /dev/null +++ b/supporting/scorer/views/label.py @@ -0,0 +1,75 @@ +import random +import streamlit as st +from config import INPUT_FILE +from data_utils import save_data + + +def page_title() -> str: + return "Label" + +def render(): + if st.session_state.current_claim is None: + + unannotated = [] + + for entry in st.session_state.data: + claims = [] + + for o in entry.get("output", []): + for c in o.get("content_parsed", []): + if not c.get("ranked"): + claims.append(c) + + if claims: + unannotated.append({"entry": entry, "claims": claims}) + + if unannotated: + st.session_state.current_claim = random.choice(unannotated) + st.session_state.drag_order = None + + bundle = st.session_state.current_claim + + if bundle is None: + st.info("All items annotated.") + else: + entry = bundle["entry"] + claims = bundle["claims"] + + st.subheader(entry.get("text")) + st.write(entry.get("normalized", "")) + + st.subheader("Annotate Events") + + for idx, c in enumerate(claims): + + with st.container(border=True): + + st.markdown(f"**Event:** {c.get('event')}") + st.markdown(f"**Reasoning:** {c.get('reasoningWhyRelevant')}") + + cols = st.columns(7) + temp = "" + + labels = [ + ("Rewording", "REWORDING"), + ("Not Specific", "NSPECIFIC"), + ("Time Incorrect", "TINCORRECT"), + ("Story?", "STORY"), + ("Duplicate?", "DUPLICATE"), + ("Bias Shown", "BIAS"), + ("Perfect", "PERFECT"), + ] + + for i, (name, tag) in enumerate(labels): + with cols[i]: + if st.checkbox(name, key=f"{tag}{idx}{c.get('event')}"): + temp += tag + " " + + c["extra_info"] = temp.strip() + c["ranked"] = True + + if st.button("Save Annotation"): + save_data(INPUT_FILE, st.session_state.data) + st.session_state.current_claim = None + print("Annotation saved") + st.rerun() \ No newline at end of file diff --git a/supporting/scorer/views/rank.py b/supporting/scorer/views/rank.py new file mode 100644 index 0000000..43a0e1e --- /dev/null +++ b/supporting/scorer/views/rank.py @@ -0,0 +1,116 @@ +import streamlit as st +import copy +import random +from streamlit_sortables import sort_items +from config import INPUT_FILE, OUTPUT_FILE +from data_utils import save_data, save_data_clean + + +def page_title() -> str: + return "Rank" + +def render(): + st.header("Rank PERFECT Events") + candidates = [] + + for entry in st.session_state.data: + perfect = [] + + for o in entry.get("output", []): + for c in o.get("content_parsed", []): + if "PERFECT" in c.get("extra_info", "") and not c.get("rank_position"): + perfect.append(c) + + if perfect: + candidates.append({"entry": entry, "claims": perfect}) + + if not candidates: + st.info("No PERFECT events available.") + st.stop() + + if "current_bundle" not in st.session_state: + st.session_state.current_bundle = random.choice(candidates) + + bundle = st.session_state.current_bundle + entry = bundle["entry"] + claims = bundle["claims"] + + st.subheader(entry.get("text")) + + # init + if "perfect_order" not in st.session_state: + st.session_state.perfect_order = list(range(len(claims))) + + order = st.session_state.perfect_order + + # labels shown in sortable UI + labels = [ + f"{i+1}. {claims[idx].get('event')}" + for i, idx in enumerate(order) + ] + + st.markdown("### Drag to reorder:") + + # ------------------------- + # Drag & drop UI + # ------------------------- + new_labels = sort_items(labels) + + # Convert reordered labels back → indices + if new_labels != labels: + new_order = [] + for lbl in new_labels: + original_pos = labels.index(lbl) + new_order.append(order[original_pos]) + + st.session_state.perfect_order = new_order + order = new_order + + st.markdown("---") + for rank, idx in enumerate(order): + c = claims[idx] + st.markdown(f"**Rank {rank+1}: {c.get('event')}**") + st.markdown(c.get("reasoningWhyRelevant")) + st.markdown("---") + + if st.button("Submit PERFECT Ranking"): + + n = len(order) + + for rank_position, idx in enumerate(order): + claim_obj = claims[idx] + + # explicit stored rank + claim_obj["rank_position"] = rank_position + 1 + + claim_obj["human_score"] = 1 + + # Auto-scoring + for entry in st.session_state.data: + for o in entry.get("output", []): + for c in o.get("content_parsed", []): + + if c.get("human_score") is not None: + continue + + extra = c.get("extra_info", "") + + if "DUPLICATE" in extra: + c["human_score"] = 0 + elif extra: + c["human_score"] = round( + c.get("score", 0) * 0.5, 3 + ) + + save_data(INPUT_FILE, st.session_state.data) + save_data_clean( + OUTPUT_FILE, + copy.deepcopy(st.session_state.data) + ) + + # reset state for next example + del st.session_state.current_bundle + del st.session_state.perfect_order + + print("Ranking saved!") + st.rerun() \ No newline at end of file diff --git a/supporting/scorer/views/rules.py b/supporting/scorer/views/rules.py new file mode 100644 index 0000000..180bc4e --- /dev/null +++ b/supporting/scorer/views/rules.py @@ -0,0 +1,9 @@ +import streamlit as st + + +def page_title() -> str: + return "View Rules" + +def render(): + with open("rules.txt", "r", encoding="utf-8") as f: + st.write(f.read()) \ No newline at end of file diff --git a/supporting/scorer/views/stats.py b/supporting/scorer/views/stats.py new file mode 100644 index 0000000..1a5d620 --- /dev/null +++ b/supporting/scorer/views/stats.py @@ -0,0 +1,42 @@ +from collections import Counter, defaultdict +import streamlit as st +import pandas as pd + +def page_title() -> str: + return "Statistics" + +def render(): + st.header("Statistics") + + word_counter = Counter() + doc_scores = defaultdict(list) + diff_scores = defaultdict(list) + + # ---- collect stats ---- + for entry in st.session_state.data: + doc_url = entry.get("documentUrl") + + for o in entry.get("output", []): + for c in o.get("content_parsed", []): + + # ---- extra_info word counts ---- + extra = c.get("extra_info", "") + if extra: + words = extra.strip().split() + word_counter.update(words) + + # -------------------------- + # Extra Info Word Counts + # -------------------------- + st.subheader("Extra Info Label Counts") + + if word_counter: + df_words = ( + pd.DataFrame(word_counter.items(), columns=["Label", "Count"]) + .sort_values("Count", ascending=False) + ) + + st.dataframe(df_words) + st.bar_chart(df_words.set_index("Label")) + else: + st.info("No extra_info data available yet.") \ No newline at end of file