Update documentation. Stop storing context. Decide on final claims source

This commit is contained in:
William Jeynes
2026-03-25 14:24:55 +00:00
parent 872346c657
commit a7f5978f64
6 changed files with 34 additions and 8 deletions
+2 -1
View File
@@ -1,2 +1,3 @@
# TEMP # TEMP
literature/ literature/
backup.tar.gz
+9
View File
@@ -7,6 +7,15 @@ Final Dissertation Submission Repository
## Solution Diagram ## Solution Diagram
-- todo -- -- todo --
## Classifier Refinement
[See RAGAS_Service](/supporting/RAGAS_Service/)
## Agent Refinement
[See agent](/supporting/agent/)
## Generated Database Link and Usage Experiments
-- todo --
## Repository Structure ## Repository Structure
``` ```
├── run.sh # Bash script to run project elements from one place ├── run.sh # Bash script to run project elements from one place
+3
View File
@@ -0,0 +1,3 @@
## Refining the agent output
TODO: Table and document experiments
+4 -3
View File
@@ -15,10 +15,11 @@ export const verificationSetup: GraphNode<typeof MessagesState> = async (state)
for (let i = 0; i < parsed.length; i++) { for (let i = 0; i < parsed.length; i++) {
const search = parsed[i].SearchQuery const search = parsed[i].SearchQuery
const data = await queryScraper(search); // const data = await queryScraper(search);
const output = await rankAndDisplayData(data, search); // const output = await rankAndDisplayData(data, search);
parsed[i].context = output; // parsed[i].context = output;
parsed[i].context = "NONE"
} }
return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 }; return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
+12
View File
@@ -1,3 +1,15 @@
# Classifier work for evaluating model quality
Made using a dataset of 1000 labeled claims from MVP pipeline.
Roberta model trained on an augmented dataset with LLM generated adversarial examples for low frequency labels.
Flan model trained using raw labelled claims, inherrent natural language ability allows for pattern recognition without the need for fake data.
Regression model trained using the roberta dataset.
Used ensemble model in the final version, with the component models available on Hugging Face.
| Model | % Correct | % Valid taken forward|Used in ensemble|Link | Model | % Correct | % Valid taken forward|Used in ensemble|Link
|------------------------------------------------------------|-----------|----------------------|----------------|- |------------------------------------------------------------|-----------|----------------------|----------------|-
| Original | 53.22 | 61.72 | | Original | 53.22 | 61.72 |
+4 -4
View File
@@ -16,18 +16,18 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"
# "documentTypes": "http://schema.org/Claim", # "documentTypes": "http://schema.org/Claim",
DEFAULT_PARAMS = [ DEFAULT_PARAMS = [
("concept", "http://weverify.eu/resource/Concept/Q212"), ("documentTypes", "http://schema.org/Claim"),
("from", "2000-01-01"), ("from", "2000-01-01"),
("to", "2026-02-19"), ("to", "2026-02-19"),
("lang", "en"), ("lang", "en"),
("limit", 5000), ("limit", 7000),
("page", 1), ("page", 1),
("orderBy", "date"), ("orderBy", "date"),
("organization", "http://weverify.eu/resource/Organization/128573c5d49d37558706194e755f152d"), # Science Direct
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake ("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact
] ]
NUM_RANDOM_CLAIMS = 40 NUM_RANDOM_CLAIMS = 200
INPUT_FILE = "../../data/input.jsonl" INPUT_FILE = "../../data/input.jsonl"
OUTPUT_FILE = "../../data/claims.json" OUTPUT_FILE = "../../data/claims.json"