Update documentation. Stop storing context. Decide on final claims source
This commit is contained in:
@@ -1,3 +1,15 @@
|
||||
# Classifier work for evaluating model quality
|
||||
|
||||
Made using a dataset of 1000 labeled claims from MVP pipeline.
|
||||
|
||||
Roberta model trained on an augmented dataset with LLM generated adversarial examples for low frequency labels.
|
||||
|
||||
Flan model trained using raw labelled claims, inherrent natural language ability allows for pattern recognition without the need for fake data.
|
||||
|
||||
Regression model trained using the roberta dataset.
|
||||
|
||||
Used ensemble model in the final version, with the component models available on Hugging Face.
|
||||
|
||||
| Model | % Correct | % Valid taken forward|Used in ensemble|Link
|
||||
|------------------------------------------------------------|-----------|----------------------|----------------|-
|
||||
| Original | 53.22 | 61.72 |
|
||||
|
||||
@@ -16,18 +16,18 @@ BASE_URL = "https://dbkf.ontotext.com/rest-api/search/documents"
|
||||
|
||||
# "documentTypes": "http://schema.org/Claim",
|
||||
DEFAULT_PARAMS = [
|
||||
("concept", "http://weverify.eu/resource/Concept/Q212"),
|
||||
("documentTypes", "http://schema.org/Claim"),
|
||||
("from", "2000-01-01"),
|
||||
("to", "2026-02-19"),
|
||||
("lang", "en"),
|
||||
("limit", 5000),
|
||||
("limit", 7000),
|
||||
("page", 1),
|
||||
("orderBy", "date"),
|
||||
("organization", "http://weverify.eu/resource/Organization/128573c5d49d37558706194e755f152d"), # Science Direct
|
||||
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
|
||||
("organization", "http://weverify.eu/resource/Organization/c71953fa6cf24ac4178f751c77862070"), # CheckYourFact
|
||||
]
|
||||
|
||||
NUM_RANDOM_CLAIMS = 40
|
||||
NUM_RANDOM_CLAIMS = 200
|
||||
|
||||
INPUT_FILE = "../../data/input.jsonl"
|
||||
OUTPUT_FILE = "../../data/claims.json"
|
||||
|
||||
Reference in New Issue
Block a user