Update documentation. Stop storing context. Decide on final claims source

2026-03-25 14:24:55 +00:00
parent 872346c657
commit a7f5978f64
6 changed files with 34 additions and 8 deletions
@@ -1,2 +1,3 @@
 # TEMP
-literature/
+literature/
+backup.tar.gz
@@ -7,6 +7,15 @@ Final Dissertation Submission Repository
 ## Solution Diagram
 -- todo --

+## Classifier Refinement
+[See RAGAS_Service](/supporting/RAGAS_Service/)
+
+## Agent Refinement
+[See agent](/supporting/agent/)
+
+## Generated Database Link and Usage Experiments
+-- todo --
+
 ## Repository Structure
 ```
 ├── run.sh                          # Bash script to run project elements from one place
@@ -0,0 +1,3 @@
+## Refining the agent output
+
+TODO: Table and document experiments
@@ -15,10 +15,11 @@ export const verificationSetup: GraphNode<typeof MessagesState> = async (state)

    for (let i = 0; i < parsed.length; i++) {
      const search = parsed[i].SearchQuery
-      const data = await queryScraper(search);
-      const output = await rankAndDisplayData(data, search);
+      // const data = await queryScraper(search);
+      // const output = await rankAndDisplayData(data, search);

-      parsed[i].context = output;
+      // parsed[i].context = output;
+      parsed[i].context = "NONE"
    }
    
    return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
@@ -1,3 +1,15 @@
+# Classifier work for evaluating model quality
+
+Made using a dataset of 1000 labeled claims from MVP pipeline.
+
+Roberta model trained on an augmented dataset with LLM generated adversarial examples for low frequency labels.
+
+Flan model trained using raw labelled claims, inherrent natural language ability allows for pattern recognition without the need for fake data.
+
+Regression model trained using the roberta dataset.
+
+Used ensemble model in the final version, with the component models available on Hugging Face. 
+
 | Model                                                      | % Correct | % Valid taken forward|Used in ensemble|Link
 |------------------------------------------------------------|-----------|----------------------|----------------|-
 | Original                                                   | 53.22     | 61.72                |
@@ -16,18 +16,18 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"

 # "documentTypes": "http://schema.org/Claim",
 DEFAULT_PARAMS = [
-    ("concept", "http://weverify.eu/resource/Concept/Q212"),
+    ("documentTypes", "http://schema.org/Claim"),
    ("from", "2000-01-01"),
    ("to", "2026-02-19"),
    ("lang", "en"),
-    ("limit", 5000),
+    ("limit", 7000),
    ("page", 1),
    ("orderBy", "date"),
+    ("organization", "http://weverify.eu/resource/Organization/128573c5d49d37558706194e755f152d"), # Science Direct
    ("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
-    ("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact
 ]

-NUM_RANDOM_CLAIMS = 40
+NUM_RANDOM_CLAIMS = 200

 INPUT_FILE = "../../data/input.jsonl"
 OUTPUT_FILE = "../../data/claims.json"