Update documentation. Stop storing context. Decide on final claims source
This commit is contained in:
+2
-1
@@ -1,2 +1,3 @@
|
|||||||
# TEMP
|
# TEMP
|
||||||
literature/
|
literature/
|
||||||
|
backup.tar.gz
|
||||||
@@ -7,6 +7,15 @@ Final Dissertation Submission Repository
|
|||||||
## Solution Diagram
|
## Solution Diagram
|
||||||
-- todo --
|
-- todo --
|
||||||
|
|
||||||
|
## Classifier Refinement
|
||||||
|
[See RAGAS_Service](/supporting/RAGAS_Service/)
|
||||||
|
|
||||||
|
## Agent Refinement
|
||||||
|
[See agent](/supporting/agent/)
|
||||||
|
|
||||||
|
## Generated Database Link and Usage Experiments
|
||||||
|
-- todo --
|
||||||
|
|
||||||
## Repository Structure
|
## Repository Structure
|
||||||
```
|
```
|
||||||
├── run.sh # Bash script to run project elements from one place
|
├── run.sh # Bash script to run project elements from one place
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
## Refining the agent output
|
||||||
|
|
||||||
|
TODO: Table and document experiments
|
||||||
@@ -15,10 +15,11 @@ export const verificationSetup: GraphNode<typeof MessagesState> = async (state)
|
|||||||
|
|
||||||
for (let i = 0; i < parsed.length; i++) {
|
for (let i = 0; i < parsed.length; i++) {
|
||||||
const search = parsed[i].SearchQuery
|
const search = parsed[i].SearchQuery
|
||||||
const data = await queryScraper(search);
|
// const data = await queryScraper(search);
|
||||||
const output = await rankAndDisplayData(data, search);
|
// const output = await rankAndDisplayData(data, search);
|
||||||
|
|
||||||
parsed[i].context = output;
|
// parsed[i].context = output;
|
||||||
|
parsed[i].context = "NONE"
|
||||||
}
|
}
|
||||||
|
|
||||||
return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
|
return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
|
||||||
|
|||||||
@@ -1,3 +1,15 @@
|
|||||||
|
# Classifier work for evaluating model quality
|
||||||
|
|
||||||
|
Made using a dataset of 1000 labeled claims from MVP pipeline.
|
||||||
|
|
||||||
|
Roberta model trained on an augmented dataset with LLM generated adversarial examples for low frequency labels.
|
||||||
|
|
||||||
|
Flan model trained using raw labelled claims, inherrent natural language ability allows for pattern recognition without the need for fake data.
|
||||||
|
|
||||||
|
Regression model trained using the roberta dataset.
|
||||||
|
|
||||||
|
Used ensemble model in the final version, with the component models available on Hugging Face.
|
||||||
|
|
||||||
| Model | % Correct | % Valid taken forward|Used in ensemble|Link
|
| Model | % Correct | % Valid taken forward|Used in ensemble|Link
|
||||||
|------------------------------------------------------------|-----------|----------------------|----------------|-
|
|------------------------------------------------------------|-----------|----------------------|----------------|-
|
||||||
| Original | 53.22 | 61.72 |
|
| Original | 53.22 | 61.72 |
|
||||||
|
|||||||
@@ -16,18 +16,18 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"
|
|||||||
|
|
||||||
# "documentTypes": "http://schema.org/Claim",
|
# "documentTypes": "http://schema.org/Claim",
|
||||||
DEFAULT_PARAMS = [
|
DEFAULT_PARAMS = [
|
||||||
("concept", "http://weverify.eu/resource/Concept/Q212"),
|
("documentTypes", "http://schema.org/Claim"),
|
||||||
("from", "2000-01-01"),
|
("from", "2000-01-01"),
|
||||||
("to", "2026-02-19"),
|
("to", "2026-02-19"),
|
||||||
("lang", "en"),
|
("lang", "en"),
|
||||||
("limit", 5000),
|
("limit", 7000),
|
||||||
("page", 1),
|
("page", 1),
|
||||||
("orderBy", "date"),
|
("orderBy", "date"),
|
||||||
|
("organization", "http://weverify.eu/resource/Organization/128573c5d49d37558706194e755f152d"), # Science Direct
|
||||||
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
|
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
|
||||||
("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact
|
|
||||||
]
|
]
|
||||||
|
|
||||||
NUM_RANDOM_CLAIMS = 40
|
NUM_RANDOM_CLAIMS = 200
|
||||||
|
|
||||||
INPUT_FILE = "../../data/input.jsonl"
|
INPUT_FILE = "../../data/input.jsonl"
|
||||||
OUTPUT_FILE = "../../data/claims.json"
|
OUTPUT_FILE = "../../data/claims.json"
|
||||||
|
|||||||
Reference in New Issue
Block a user