Update documentation. Stop storing context. Decide on final claims source
This commit is contained in:
+2
-1
@@ -1,2 +1,3 @@
|
||||
# TEMP
|
||||
literature/
|
||||
literature/
|
||||
backup.tar.gz
|
||||
@@ -7,6 +7,15 @@ Final Dissertation Submission Repository
|
||||
## Solution Diagram
|
||||
-- todo --
|
||||
|
||||
## Classifier Refinement
|
||||
[See RAGAS_Service](/supporting/RAGAS_Service/)
|
||||
|
||||
## Agent Refinement
|
||||
[See agent](/supporting/agent/)
|
||||
|
||||
## Generated Database Link and Usage Experiments
|
||||
-- todo --
|
||||
|
||||
## Repository Structure
|
||||
```
|
||||
├── run.sh # Bash script to run project elements from one place
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
## Refining the agent output
|
||||
|
||||
TODO: Table and document experiments
|
||||
@@ -15,10 +15,11 @@ export const verificationSetup: GraphNode<typeof MessagesState> = async (state)
|
||||
|
||||
for (let i = 0; i < parsed.length; i++) {
|
||||
const search = parsed[i].SearchQuery
|
||||
const data = await queryScraper(search);
|
||||
const output = await rankAndDisplayData(data, search);
|
||||
// const data = await queryScraper(search);
|
||||
// const output = await rankAndDisplayData(data, search);
|
||||
|
||||
parsed[i].context = output;
|
||||
// parsed[i].context = output;
|
||||
parsed[i].context = "NONE"
|
||||
}
|
||||
|
||||
return { proposedTriggerEvent: parsed, proposedTriggerEventIndex: 0 };
|
||||
|
||||
@@ -1,3 +1,15 @@
|
||||
# Classifier work for evaluating model quality
|
||||
|
||||
Made using a dataset of 1000 labeled claims from MVP pipeline.
|
||||
|
||||
Roberta model trained on an augmented dataset with LLM generated adversarial examples for low frequency labels.
|
||||
|
||||
Flan model trained using raw labelled claims, inherrent natural language ability allows for pattern recognition without the need for fake data.
|
||||
|
||||
Regression model trained using the roberta dataset.
|
||||
|
||||
Used ensemble model in the final version, with the component models available on Hugging Face.
|
||||
|
||||
| Model | % Correct | % Valid taken forward|Used in ensemble|Link
|
||||
|------------------------------------------------------------|-----------|----------------------|----------------|-
|
||||
| Original | 53.22 | 61.72 |
|
||||
|
||||
@@ -16,18 +16,18 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"
|
||||
|
||||
# "documentTypes": "http://schema.org/Claim",
|
||||
DEFAULT_PARAMS = [
|
||||
("concept", "http://weverify.eu/resource/Concept/Q212"),
|
||||
("documentTypes", "http://schema.org/Claim"),
|
||||
("from", "2000-01-01"),
|
||||
("to", "2026-02-19"),
|
||||
("lang", "en"),
|
||||
("limit", 5000),
|
||||
("limit", 7000),
|
||||
("page", 1),
|
||||
("orderBy", "date"),
|
||||
("organization", "http://weverify.eu/resource/Organization/128573c5d49d37558706194e755f152d"), # Science Direct
|
||||
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
|
||||
("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact
|
||||
]
|
||||
|
||||
NUM_RANDOM_CLAIMS = 40
|
||||
NUM_RANDOM_CLAIMS = 200
|
||||
|
||||
INPUT_FILE = "../../data/input.jsonl"
|
||||
OUTPUT_FILE = "../../data/claims.json"
|
||||
|
||||
Reference in New Issue
Block a user