Start refining scorer. Filter data passed to trigger event agent
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import { GraphNode } from "@langchain/langgraph";
|
||||
import { MessagesState } from "../state";
|
||||
import { AIMessage, BaseMessage, HumanMessage } from "@langchain/core/messages";
|
||||
import { rankExampleTriggerEvents, rankNormalizedClaims } from "../tools/retreiveExamples";
|
||||
import { AIMessage, BaseMessage } from "@langchain/core/messages";
|
||||
import { rankExampleTriggerEvents } from "../tools/retreiveExamples";
|
||||
|
||||
export const triggerEventSetup: GraphNode<typeof MessagesState> = async (state) => {
|
||||
let nc = state?.messages?.at(-1)?.content ?? "" //keep a copy of normalized trigger event. Again two things, womp womp
|
||||
@@ -10,7 +10,7 @@ export const triggerEventSetup: GraphNode<typeof MessagesState> = async (state)
|
||||
let similarityResults = await rankExampleTriggerEvents(state.disinformationTitle)
|
||||
|
||||
let messages : BaseMessage[] = similarityResults.map((item) => {
|
||||
return new AIMessage(`Event: ${item.rawtext}, Claims and given scores: ${item.cleantext}`)
|
||||
return new AIMessage(`- Event: ${item.rawtext} \n\n - Claims and given scores: ${item.cleantext}`)
|
||||
})
|
||||
|
||||
return { messages: messages, disinformationTitle: state.disinformationTitle, normalizedClaim: nc };
|
||||
|
||||
@@ -2,7 +2,7 @@ You are an agent in a pipeline to analyse disinformation.
|
||||
Once the information has been created as below, a dataset can be created to feed a model for prediction, which will improve pre-bunking efforts.
|
||||
|
||||
There is a false disinformation claim circulating:
|
||||
("###NTITLE###
|
||||
###NTITLE###
|
||||
Produce up-to 5 specific "trigger events" that happened that could have led to the spread of this disinformation.
|
||||
|
||||
Remember the time frame of the disinformation campaign: {{CAMPAIGN_DATE}}
|
||||
@@ -14,16 +14,14 @@ Include a concise but specific search query that can be looked up on a search en
|
||||
|
||||
Include a url to a source for your trigger event (not a web search, a specific url from a reputuable source). Do not use OAI cite, include url as text in response.
|
||||
|
||||
If you are referencing another disinformation campaign, provide the specific narrative used, not just sentiment, and ensure it is sufficiently different from the claim we are analysing.
|
||||
|
||||
Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,IsItselfDisinformation".
|
||||
|
||||
Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url".
|
||||
|
||||
Multiple tool invocations should be requested at once, if applicable.
|
||||
Use your abilities to look between the lines and produce some insightful analysis, thinking both short and long term.
|
||||
|
||||
Events will be reordered as part of processing, each statement must stand alone
|
||||
|
||||
The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given, to help understanding the intended quality of analysis
|
||||
The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
|
||||
Analysis should only be completed for proposed events that would graner >0.7 points
|
||||
|
||||
Lets go through it step by step
|
||||
@@ -9,7 +9,6 @@ export const ProposedTriggerEvent = z.object({
|
||||
ReasoningWhyRelevant: z.string(),
|
||||
SearchQuery: z.string(),
|
||||
Url: z.url(),
|
||||
IsItselfDisinformation: z.boolean(),
|
||||
context: z.string().optional(),
|
||||
score: z.number().optional()
|
||||
})
|
||||
|
||||
@@ -257,7 +257,11 @@ async function ensureExampleClaimJsonlLoaded(): Promise<void> {
|
||||
|
||||
jsonlRawtexts.push(text);
|
||||
|
||||
jsonlCleantexts.push(row.output[0].content);
|
||||
const parsed_content = row.output[0].content_parsed;
|
||||
|
||||
const filtered_content = parsed_content.filter(itm => itm.human_score > 0.5 && itm.score > 0.5)
|
||||
|
||||
jsonlCleantexts.push(JSON.stringify(filtered_content));
|
||||
jsonlEmbeddings.push(embedding);
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ def load_data(file_path):
|
||||
o["content_parsed"] = json.loads(o["content"])
|
||||
except json.JSONDecodeError:
|
||||
o["content_parsed"] = []
|
||||
print("parse error")
|
||||
data.append(entry)
|
||||
return data
|
||||
|
||||
@@ -68,59 +69,80 @@ if view == "All Claims":
|
||||
st.markdown("---")
|
||||
|
||||
# --------------------------
|
||||
# Single Claim Random View
|
||||
# Single Claim Random View (Ranking Based)
|
||||
# --------------------------
|
||||
elif view == "Single Claim Random":
|
||||
# Rebuild the list of unscored claims only when needed
|
||||
|
||||
# Select an entry that still has unscored items
|
||||
if st.session_state.current_claim is None:
|
||||
single_claims = []
|
||||
unscored_entries = []
|
||||
|
||||
for entry in st.session_state.data:
|
||||
url = entry.get("documentUrl")
|
||||
text = entry.get("text")
|
||||
unscored = []
|
||||
|
||||
for o in entry.get("output", []):
|
||||
for c in o.get("content_parsed", []):
|
||||
if "human_score" not in c or c.get("human_score") is None:
|
||||
single_claims.append({
|
||||
"documentUrl": url,
|
||||
"text": text,
|
||||
"event": c.get("event"),
|
||||
"reasoningWhyRelevant": c.get("reasoningWhyRelevant"),
|
||||
"raw_obj": c # reference to original object
|
||||
})
|
||||
if single_claims:
|
||||
st.session_state.current_claim = random.choice(single_claims)
|
||||
if c.get("human_score") is None:
|
||||
unscored.append(c)
|
||||
|
||||
if unscored:
|
||||
unscored_entries.append({
|
||||
"entry": entry,
|
||||
"claims": unscored
|
||||
})
|
||||
|
||||
if unscored_entries:
|
||||
st.session_state.current_claim = random.choice(unscored_entries)
|
||||
else:
|
||||
st.session_state.current_claim = None
|
||||
|
||||
claim = st.session_state.current_claim
|
||||
bundle = st.session_state.current_claim
|
||||
|
||||
if claim is None:
|
||||
st.info("No claims available without a human score.")
|
||||
if bundle is None:
|
||||
st.info("No entries remaining without human scores.")
|
||||
else:
|
||||
st.subheader(f"{claim['text']}")
|
||||
st.markdown(f"**Event:** {claim['event']}")
|
||||
st.markdown(f"**Reasoning:** {claim['reasoningWhyRelevant']}")
|
||||
entry = bundle["entry"]
|
||||
claims = bundle["claims"]
|
||||
|
||||
# Input for new human score
|
||||
new_score = st.number_input(
|
||||
"Provide a score (0 to 1)",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.5,
|
||||
step=0.01,
|
||||
format="%.2f"
|
||||
)
|
||||
st.subheader(entry.get("text"))
|
||||
|
||||
if st.button("Submit Score"):
|
||||
# Update the original object
|
||||
claim["raw_obj"]["human_score"] = new_score
|
||||
st.write("Rank events (1 = best / most relevant)")
|
||||
|
||||
rankings = []
|
||||
|
||||
# Collect rankings
|
||||
for i, c in enumerate(claims):
|
||||
st.markdown(f"### Event {i+1}")
|
||||
st.markdown(f"**Event:** {c.get('event')}")
|
||||
st.markdown(f"**Reasoning:** {c.get('reasoningWhyRelevant')}")
|
||||
|
||||
rank = st.number_input(
|
||||
f"Rank for event {i+1}",
|
||||
min_value=1,
|
||||
max_value=len(claims),
|
||||
key=f"rank_{i}"
|
||||
)
|
||||
|
||||
rankings.append((c, rank))
|
||||
|
||||
if st.button("Submit Ranking"):
|
||||
|
||||
# Sort by rank (ascending = best)
|
||||
rankings.sort(key=lambda x: x[1])
|
||||
|
||||
n = len(rankings)
|
||||
|
||||
# Convert ranking -> normalized score
|
||||
for idx, (claim_obj, _) in enumerate(rankings):
|
||||
if n == 1:
|
||||
score = 1.0
|
||||
else:
|
||||
score = 1 - (idx / (n - 1))
|
||||
|
||||
claim_obj["human_score"] = round(score, 3)
|
||||
|
||||
# Save immediately
|
||||
save_data(DATA_FILE, st.session_state.data)
|
||||
st.success("Score saved!")
|
||||
st.success("Ranking converted to scores and saved!")
|
||||
|
||||
# Clear current claim so a new one will be selected next run
|
||||
st.session_state.current_claim = None
|
||||
|
||||
# Rerun app to show a new claim
|
||||
st.rerun()
|
||||
st.rerun()
|
||||
@@ -0,0 +1,4 @@
|
||||
1. Proposed trigger events rewording the initial prompt should penalised
|
||||
2. Proposed trigger events should be specific enough to create resonable analysis
|
||||
3. Proposed trigger events should be time-specific. A trigger events that happened after the event is invalid
|
||||
4. Proposed trigger events should not describe a story of propogation
|
||||
Reference in New Issue
Block a user