Refine scoring to allow for better iteration on frontend. Update generate_adversarial.py

This commit is contained in:
William Jeynes
2026-03-22 16:04:38 +00:00
parent f4e84af272
commit c69730df6b
3 changed files with 17 additions and 11 deletions
+3 -4
View File
@@ -8,10 +8,9 @@ export const robertaMetrics: GraphNode<typeof MessagesState> = async (state) =>
const result = await evaluateWithRoberta({answer}) const result = await evaluateWithRoberta({answer})
let score = 0;
if (result.validProb > (result.invalidProb+0.4)) { const score = result.validProb - result.invalidProb;
score = 0.7 + ((result.validProb - (result.invalidProb+0.4))*0.3);
}
return { return {
messages: [ new AIMessage("ROBERTA:" + score)] messages: [ new AIMessage("ROBERTA:" + score)]
@@ -24,13 +24,17 @@ def make_request(line):
try: try:
data = json.loads(line) data = json.loads(line)
prompt = ( prompt = (
"Provide a story item for the spread of a disinformation claim" "Provide a non specific piece of background, tallking point or other minformaiton that allowed the a disinformaition to spread; to aid in analysis and debunking"
"that is related to the topic: " "The topic in question is: "
+ data.get("text", "") + data.get("text", "")
+ " Include just the event no other text." + " Include just the example no other text."
+ " A good example would be 'No immediate U.S. government confirmation and nearsimultaneous factchecks/debunks appeared (factchecks published June 26, 2024).' and 'Recycled/old footage of aircraft being shot down previously viral and repeatedly misattributed to the RussiaUkraine war (e.g., 2011 Libya footage reused in 2022)'" + " A good example would be"
+ " If you cannot answer just return an empty string" + "'Existing high-profile reporting and public discussion throughout 20222023 about foreign fighters and mercenary recruitment (including Russian recruitment and Wagner Group activity).'"
+ " Be concise, make no mistakes" + "Since it focusses on non-instantiated or proven discussion points"
+ " and "
+ "'2016 Continued EURussia business dialogues and investments (documented by policy institutes and trade analyses) showing ongoing economic links despite political tensions'"
+ "Since it does not name a specific dialogue or investement"
+ " Be concise, make no mistakes, use similar style and wording to provided examples"
) )
if not prompt: if not prompt:
+4 -1
View File
@@ -5,7 +5,8 @@ import streamlit as st
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
THRESH = 0.7 # THRESH = 0.4
THRESH = 0.6
def page_title() -> str: def page_title() -> str:
return "Statistics" return "Statistics"
@@ -121,6 +122,8 @@ def render():
goodkept = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"] goodkept = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"]
allkept = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"] + confidence_counter["Over-confident"] allkept = confidence_counter["Correct-PERFECT"] + confidence_counter["Correct-FINE"] + confidence_counter["Over-confident"]
if (allkept == 0):
allkept = -1
corr_percent = (correct / total) * 100 corr_percent = (correct / total) * 100
kept_percent = (goodkept / allkept) * 100 kept_percent = (goodkept / allkept) * 100