Add difference between auto scoring system and our own labels
This commit is contained in:
@@ -1,6 +1,9 @@
|
|||||||
from collections import Counter, defaultdict
|
from collections import Counter
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
THRESH = 0.7
|
||||||
|
|
||||||
def page_title() -> str:
|
def page_title() -> str:
|
||||||
return "Statistics"
|
return "Statistics"
|
||||||
@@ -9,22 +12,37 @@ def render():
|
|||||||
st.header("Statistics")
|
st.header("Statistics")
|
||||||
|
|
||||||
word_counter = Counter()
|
word_counter = Counter()
|
||||||
doc_scores = defaultdict(list)
|
confidence_counter = Counter()
|
||||||
diff_scores = defaultdict(list)
|
|
||||||
|
|
||||||
# ---- collect stats ----
|
# ---- collect stats ----
|
||||||
for entry in st.session_state.data:
|
for entry in st.session_state.data:
|
||||||
doc_url = entry.get("documentUrl")
|
|
||||||
|
|
||||||
for o in entry.get("output", []):
|
for o in entry.get("output", []):
|
||||||
for c in o.get("content_parsed", []):
|
for c in o.get("content_parsed", []):
|
||||||
|
|
||||||
# ---- extra_info word counts ----
|
# ---- extra_info word counts ----
|
||||||
extra = c.get("extra_info", "")
|
extra = c.get("extra_info", "")
|
||||||
|
score = c.get("score", None)
|
||||||
|
|
||||||
if extra:
|
if extra:
|
||||||
words = extra.strip().split()
|
words = extra.strip().split()
|
||||||
word_counter.update(words)
|
word_counter.update(words)
|
||||||
|
|
||||||
|
# ---- confidence classification ----
|
||||||
|
if score is not None:
|
||||||
|
extra_lower = extra.strip().lower()
|
||||||
|
|
||||||
|
if score > THRESH and extra_lower == "perfect":
|
||||||
|
confidence_counter["Correct"] += 1
|
||||||
|
|
||||||
|
elif score > THRESH and extra_lower != "perfect":
|
||||||
|
confidence_counter["Over-confident"] += 1
|
||||||
|
|
||||||
|
elif score < THRESH and extra_lower == "perfect":
|
||||||
|
confidence_counter["Under-confident"] += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
confidence_counter["Other"] += 1
|
||||||
|
|
||||||
# --------------------------
|
# --------------------------
|
||||||
# Extra Info Word Counts
|
# Extra Info Word Counts
|
||||||
# --------------------------
|
# --------------------------
|
||||||
@@ -39,4 +57,29 @@ def render():
|
|||||||
st.dataframe(df_words)
|
st.dataframe(df_words)
|
||||||
st.bar_chart(df_words.set_index("Label"))
|
st.bar_chart(df_words.set_index("Label"))
|
||||||
else:
|
else:
|
||||||
st.info("No extra_info data available yet.")
|
st.info("No extra_info data available yet.")
|
||||||
|
|
||||||
|
# --------------------------
|
||||||
|
# Confidence vs Label Stats
|
||||||
|
# --------------------------
|
||||||
|
st.subheader("Confidence vs Label Distribution")
|
||||||
|
|
||||||
|
if confidence_counter:
|
||||||
|
df_conf = pd.DataFrame(
|
||||||
|
confidence_counter.items(),
|
||||||
|
columns=["Category", "Count"]
|
||||||
|
)
|
||||||
|
|
||||||
|
fig, ax = plt.subplots()
|
||||||
|
ax.pie(
|
||||||
|
df_conf["Count"],
|
||||||
|
labels=df_conf["Category"],
|
||||||
|
autopct="%1.1f%%",
|
||||||
|
startangle=90
|
||||||
|
)
|
||||||
|
ax.axis("equal")
|
||||||
|
|
||||||
|
st.pyplot(fig, width=500)
|
||||||
|
|
||||||
|
else:
|
||||||
|
st.info("No score data available yet.")
|
||||||
Reference in New Issue
Block a user