Start refining scorer. Filter data passed to trigger event agent

2026-02-18 15:03:13 +00:00
parent 3f14b61cd4
commit a2cb93b44e
6 changed files with 77 additions and 50 deletions
@@ -1,7 +1,7 @@
 import { GraphNode } from "@langchain/langgraph";
 import { MessagesState } from "../state";
-import { AIMessage, BaseMessage, HumanMessage } from "@langchain/core/messages";
+import { AIMessage, BaseMessage } from "@langchain/core/messages";
-import { rankExampleTriggerEvents, rankNormalizedClaims } from "../tools/retreiveExamples";
+import { rankExampleTriggerEvents } from "../tools/retreiveExamples";
 export const triggerEventSetup: GraphNode<typeof MessagesState> = async (state) => {
  let nc = state?.messages?.at(-1)?.content ?? "" //keep a copy of normalized trigger event. Again two things, womp womp
@@ -10,7 +10,7 @@ export const triggerEventSetup: GraphNode<typeof MessagesState> = async (state)
  let similarityResults = await rankExampleTriggerEvents(state.disinformationTitle)
  let messages : BaseMessage[] = similarityResults.map((item) => {
-    return new AIMessage(`Event: ${item.rawtext}, Claims and given scores: ${item.cleantext}`)
+    return new AIMessage(`- Event: ${item.rawtext} \n\n - Claims and given scores: ${item.cleantext}`)
  })
  return { messages: messages, disinformationTitle: state.disinformationTitle, normalizedClaim: nc };
@@ -2,7 +2,7 @@ You are an agent in a pipeline to analyse disinformation.
 Once the information has been created as below, a dataset can be created to feed a model for prediction, which will improve pre-bunking efforts.  
 There is a false disinformation claim circulating:
-("###NTITLE###
+###NTITLE###
 Produce up-to 5 specific "trigger events" that happened that could have led to the spread of this disinformation.
 Remember the time frame of the disinformation campaign: {{CAMPAIGN_DATE}}
@@ -14,16 +14,14 @@ Include a concise but specific search query that can be looked up on a search en
 Include a url to a source for your trigger event (not a web search, a specific url from a reputuable source). Do not use OAI cite, include url as text in response.
-If you are referencing another disinformation campaign, provide the specific narrative used, not just sentiment, and ensure it is sufficiently different from the claim we are analysing.
+Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url".
 Use a JSON format with each entry containing "Event,ReasoningWhyRelevant,SearchQuery,Url,IsItselfDisinformation".
 Multiple tool invocations should be requested at once, if applicable.
 Use your abilities to look between the lines and produce some insightful analysis, thinking both short and long term.
 Events will be reordered as part of processing, each statement must stand alone
-The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given, to help understanding the intended quality of analysis
+The preceeding messages act as examples of previous responses to potentially ficitonal events and scores given.
 Analysis should only be completed for proposed events that would graner >0.7 points
 Lets go through it step by step
@@ -9,7 +9,6 @@ export const ProposedTriggerEvent = z.object({
  ReasoningWhyRelevant: z.string(),
  SearchQuery: z.string(),
  Url: z.url(),
  IsItselfDisinformation: z.boolean(),
  context: z.string().optional(),
  score: z.number().optional()
 })
@@ -257,7 +257,11 @@ async function ensureExampleClaimJsonlLoaded(): Promise<void> {
    jsonlRawtexts.push(text);
-    jsonlCleantexts.push(row.output[0].content);
+    const parsed_content = row.output[0].content_parsed;
    const filtered_content = parsed_content.filter(itm => itm.human_score > 0.5 && itm.score > 0.5)
    jsonlCleantexts.push(JSON.stringify(filtered_content));
    jsonlEmbeddings.push(embedding);
  }
@@ -24,6 +24,7 @@ def load_data(file_path):
                                o["content_parsed"] = json.loads(o["content"])
                            except json.JSONDecodeError:
                                o["content_parsed"] = []
                                print("parse error")
                    data.append(entry)
    return data
@@ -68,59 +69,80 @@ if view == "All Claims":
                st.markdown("---")
 # --------------------------
-# Single Claim Random View
+# Single Claim Random View (Ranking Based)
 # --------------------------
 elif view == "Single Claim Random":
-    # Rebuild the list of unscored claims only when needed
+
    # Select an entry that still has unscored items
    if st.session_state.current_claim is None:
-        single_claims = []
+        unscored_entries = []
        for entry in st.session_state.data:
-            url = entry.get("documentUrl")
+            unscored = []
-            text = entry.get("text")
+
            for o in entry.get("output", []):
                for c in o.get("content_parsed", []):
-                    if "human_score" not in c or c.get("human_score") is None:
+                    if c.get("human_score") is None:
-                        single_claims.append({
+                        unscored.append(c)
-                            "documentUrl": url,
+
-                            "text": text,
+            if unscored:
-                            "event": c.get("event"),
+                unscored_entries.append({
-                            "reasoningWhyRelevant": c.get("reasoningWhyRelevant"),
+                    "entry": entry,
-                            "raw_obj": c  # reference to original object
+                    "claims": unscored
-                        })
+                })
-        if single_claims:
+
-            st.session_state.current_claim = random.choice(single_claims)
+        if unscored_entries:
            st.session_state.current_claim = random.choice(unscored_entries)
        else:
            st.session_state.current_claim = None
-    claim = st.session_state.current_claim
+    bundle = st.session_state.current_claim
-    if claim is None:
+    if bundle is None:
-        st.info("No claims available without a human score.")
+        st.info("No entries remaining without human scores.")
    else:
-        st.subheader(f"{claim['text']}")
+        entry = bundle["entry"]
-        st.markdown(f"**Event:** {claim['event']}")
+        claims = bundle["claims"]
        st.markdown(f"**Reasoning:** {claim['reasoningWhyRelevant']}")
-        # Input for new human score
+        st.subheader(entry.get("text"))
        new_score = st.number_input(
            "Provide a score (0 to 1)",
            min_value=0.0,
            max_value=1.0,
            value=0.5,
            step=0.01,
            format="%.2f"
        )
-        if st.button("Submit Score"):
+        st.write("Rank events (1 = best / most relevant)")
-            # Update the original object
+
-            claim["raw_obj"]["human_score"] = new_score
+        rankings = []
        # Collect rankings
        for i, c in enumerate(claims):
            st.markdown(f"### Event {i+1}")
            st.markdown(f"**Event:** {c.get('event')}")
            st.markdown(f"**Reasoning:** {c.get('reasoningWhyRelevant')}")
            rank = st.number_input(
                f"Rank for event {i+1}",
                min_value=1,
                max_value=len(claims),
                key=f"rank_{i}"
            )
            rankings.append((c, rank))
        if st.button("Submit Ranking"):
            # Sort by rank (ascending = best)
            rankings.sort(key=lambda x: x[1])
            n = len(rankings)
            # Convert ranking -> normalized score
            for idx, (claim_obj, _) in enumerate(rankings):
                if n == 1:
                    score = 1.0
                else:
                    score = 1 - (idx / (n - 1))
                claim_obj["human_score"] = round(score, 3)
            # Save immediately
            save_data(DATA_FILE, st.session_state.data)
-            st.success("Score saved!")
+            st.success("Ranking converted to scores and saved!")
            # Clear current claim so a new one will be selected next run
            st.session_state.current_claim = None
-
+            st.rerun()
            # Rerun app to show a new claim
            st.rerun()
@@ -0,0 +1,4 @@
 1. Proposed trigger events rewording the initial prompt should penalised
 2. Proposed trigger events should be specific enough to create resonable analysis
 3. Proposed trigger events should be time-specific. A trigger events that happened after the event is invalid
 4. Proposed trigger events should not describe a story of propogation