diff --git a/.gitignore b/.gitignore index e274d2c..0f16435 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ # TEMP -literature/ \ No newline at end of file +literature/ +backup.tar.gz \ No newline at end of file diff --git a/README.md b/README.md index 5836923..64e3c50 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,15 @@ Final Dissertation Submission Repository ## Solution Diagram -- todo -- +## Classifier Refinement +[See RAGAS_Service](/supporting/RAGAS_Service/) + +## Agent Refinement +[See agent](/supporting/agent/) + +## Generated Database Link and Usage Experiments +-- todo -- + ## Repository Structure ``` ├── run.sh # Bash script to run project elements from one place diff --git a/agent/README.md b/agent/README.md new file mode 100644 index 0000000..4a77017 --- /dev/null +++ b/agent/README.md @@ -0,0 +1,3 @@ +## Refining the agent output + +TODO: Table and document experiments \ No newline at end of file diff --git a/agent/nodes/verificationSetup.ts b/agent/nodes/verificationSetup.ts index 700b1d3..86fd0d4 100644 --- a/agent/nodes/verificationSetup.ts +++ b/agent/nodes/verificationSetup.ts @@ -15,10 +15,11 @@ export const verificationSetup: GraphNode = async (state) for (let i = 0; i < parsed.length; i++) { const search = parsed[i].SearchQuery - const data = await queryScraper(search); - const output = await rankAndDisplayData(data, search); + // const data = await queryScraper(search); + // const output = await rankAndDisplayData(data, search); - parsed[i].context = output; + // parsed[i].context = output; + parsed[i].context = "NONE" } return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 }; diff --git a/supporting/RAGAS_Service/README.md b/supporting/RAGAS_Service/README.md index 841a0fb..05440b7 100644 --- a/supporting/RAGAS_Service/README.md +++ b/supporting/RAGAS_Service/README.md @@ -1,3 +1,15 @@ +# Classifier work for evaluating model quality + +Made using a dataset of 1000 labeled claims from MVP pipeline. + +Roberta model trained on an augmented dataset with LLM generated adversarial examples for low frequency labels. + +Flan model trained using raw labelled claims, inherrent natural language ability allows for pattern recognition without the need for fake data. + +Regression model trained using the roberta dataset. + +Used ensemble model in the final version, with the component models available on Hugging Face. + | Model | % Correct | % Valid taken forward|Used in ensemble|Link |------------------------------------------------------------|-----------|----------------------|----------------|- | Original | 53.22 | 61.72 | diff --git a/supporting/dbkf/fetch.py b/supporting/dbkf/fetch.py index 99dbfb7..5ca246a 100644 --- a/supporting/dbkf/fetch.py +++ b/supporting/dbkf/fetch.py @@ -16,18 +16,18 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents" # "documentTypes": "http://schema.org/Claim", DEFAULT_PARAMS = [ - ("concept", "http://weverify.eu/resource/Concept/Q212"), + ("documentTypes", "http://schema.org/Claim"), ("from", "2000-01-01"), ("to", "2026-02-19"), ("lang", "en"), - ("limit", 5000), + ("limit", 7000), ("page", 1), ("orderBy", "date"), + ("organization", "http://weverify.eu/resource/Organization/128573c5d49d37558706194e755f152d"), # Science Direct ("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake - ("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact ] -NUM_RANDOM_CLAIMS = 40 +NUM_RANDOM_CLAIMS = 200 INPUT_FILE = "../../data/input.jsonl" OUTPUT_FILE = "../../data/claims.json"