Add better scoring, ignoring duplicates, catching under and over confidence. Showing difference between "FINE" and "PERFECT"

This commit is contained in:
William Jeynes
2026-03-13 12:18:52 +00:00
parent 8311556855
commit d5c6cb444d
+31 -7
View File
@@ -65,6 +65,9 @@ def render():
st.subheader(f"File: {file_path.name}") st.subheader(f"File: {file_path.name}")
confidence_counter = Counter() confidence_counter = Counter()
overconfident_docs = []
underconfident_docs = []
dup_counter = 0
# ---- Read file line by line ---- # ---- Read file line by line ----
with open(file_path, "r", encoding="utf-8") as f: with open(file_path, "r", encoding="utf-8") as f:
@@ -73,18 +76,24 @@ def render():
entry = json.loads(line) entry = json.loads(line)
except json.JSONDecodeError: except json.JSONDecodeError:
continue continue
doc_id = entry.get("documentUrl", "UNKNOWN")
for event in entry.get("events", []): for event in entry.get("events", []):
score = event.get("score", None) score = event.get("score", None)
extra_lower = (event.get("extra_info", "") or "").strip().lower() extra_lower = (event.get("extra_info", "") or "").strip().lower()
print(extra_lower) # print(extra_lower)
if score is not None: if score is not None:
if score > THRESH and extra_lower == "perfect": if "duplicate" in extra_lower:
confidence_counter["Correct-TRUE"] += 1 dup_counter += 1
elif score > THRESH and extra_lower != "perfect": elif score > THRESH and extra_lower == "perfect":
confidence_counter["Correct-PERFECT"] += 1
elif score > THRESH and extra_lower == "":
confidence_counter["Correct-FINE"] += 1
elif score > THRESH and extra_lower != "perfect" and extra_lower != "":
confidence_counter["Over-confident"] += 1 confidence_counter["Over-confident"] += 1
elif score < THRESH and extra_lower == "perfect": overconfident_docs.append(doc_id)
elif score < THRESH and (extra_lower == "perfect" or extra_lower == ""):
confidence_counter["Under-confident"] += 1 confidence_counter["Under-confident"] += 1
underconfident_docs.append(doc_id)
else: else:
confidence_counter["Correct-FALSE"] += 1 confidence_counter["Correct-FALSE"] += 1
@@ -105,10 +114,25 @@ def render():
ax.set_title(file_path.name) ax.set_title(file_path.name)
total = sum(confidence_counter.values()) total = sum(confidence_counter.values())
correct = confidence_counter["Correct-TRUE"] + confidence_counter["Correct-FALSE"] correct = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"] + confidence_counter["Correct-FALSE"]
corr_percent = (correct / total) * 100 corr_percent = (correct / total) * 100
st.markdown(f"**Correct: {corr_percent:.2f}% ({correct}/{total})**") st.markdown(f"**Correct: {corr_percent:.2f}% ({correct}/{total})**")
st.markdown(f"Duplicates: {dup_counter}")
st.pyplot(fig, width=500) st.pyplot(fig, width=500)
st.subheader("Over-Confident Document IDs")
if overconfident_docs:
st.container(height=200).write(sorted(set(overconfident_docs)))
else:
st.info("None")
st.subheader("Under-Confident Document IDs")
if underconfident_docs:
st.container(height=200).write(sorted(set(underconfident_docs)))
else:
st.info("None")
else: else:
st.info("No score data available in this file.") st.info("No score data available in this file.")