Update documentation. Stop storing context. Decide on final claims source

2026-03-25 14:24:55 +00:00
parent 872346c657
commit a7f5978f64
6 changed files with 34 additions and 8 deletions
@@ -1,2 +1,3 @@
 # TEMP
-literature/
+literature/
 backup.tar.gz
@@ -7,6 +7,15 @@ Final Dissertation Submission Repository
 ## Solution Diagram
 -- todo --
 ## Classifier Refinement
 [See RAGAS_Service](/supporting/RAGAS_Service/)
 ## Agent Refinement
 [See agent](/supporting/agent/)
 ## Generated Database Link and Usage Experiments
 -- todo --
 ## Repository Structure
 ```
 ├── run.sh                          # Bash script to run project elements from one place
@@ -0,0 +1,3 @@
 ## Refining the agent output
 TODO: Table and document experiments
@@ -15,10 +15,11 @@ export const verificationSetup: GraphNode<typeof MessagesState> = async (state)
    for (let i = 0; i < parsed.length; i++) {
      const search = parsed[i].SearchQuery
-      const data = await queryScraper(search);
+      // const data = await queryScraper(search);
-      const output = await rankAndDisplayData(data, search);
+      // const output = await rankAndDisplayData(data, search);
-      parsed[i].context = output;
+      // parsed[i].context = output;
      parsed[i].context = "NONE"
    }
    return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
@@ -1,3 +1,15 @@
 # Classifier work for evaluating model quality
 Made using a dataset of 1000 labeled claims from MVP pipeline.
 Roberta model trained on an augmented dataset with LLM generated adversarial examples for low frequency labels.
 Flan model trained using raw labelled claims, inherrent natural language ability allows for pattern recognition without the need for fake data.
 Regression model trained using the roberta dataset.
 Used ensemble model in the final version, with the component models available on Hugging Face. 
 | Model                                                      | % Correct | % Valid taken forward|Used in ensemble|Link
 |------------------------------------------------------------|-----------|----------------------|----------------|-
 | Original                                                   | 53.22     | 61.72                |
@@ -16,18 +16,18 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"
 # "documentTypes": "http://schema.org/Claim",
 DEFAULT_PARAMS = [
-    ("concept", "http://weverify.eu/resource/Concept/Q212"),
+    ("documentTypes", "http://schema.org/Claim"),
    ("from", "2000-01-01"),
    ("to", "2026-02-19"),
    ("lang", "en"),
-    ("limit", 5000),
+    ("limit", 7000),
    ("page", 1),
    ("orderBy", "date"),
    ("organization", "http://weverify.eu/resource/Organization/128573c5d49d37558706194e755f152d"), # Science Direct
    ("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
    ("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact
 ]
-NUM_RANDOM_CLAIMS = 40
+NUM_RANDOM_CLAIMS = 200
 INPUT_FILE = "../../data/input.jsonl"
 OUTPUT_FILE = "../../data/claims.json"
		`@@ -0,0 +1,3 @@`
							`## Refining the agent output`

							`TODO: Table and document experiments`