Do not enter existing data if it has no good trigger events
This commit is contained in:
@@ -14,6 +14,7 @@ Final Dissertation Submission Repository
|
|||||||
| ├── claims.json # Retreived claims from dbkf fetcher
|
| ├── claims.json # Retreived claims from dbkf fetcher
|
||||||
| ├── dev-eng.csv
|
| ├── dev-eng.csv
|
||||||
| ├── train-eng.csv # Normalized disinformation claims in CSV format from CLAN
|
| ├── train-eng.csv # Normalized disinformation claims in CSV format from CLAN
|
||||||
|
| ├── Iffy.json # Iffy dataset of disinformation domains
|
||||||
| ├── input.jsonl # Response in cleaned format to give as context to agent
|
| ├── input.jsonl # Response in cleaned format to give as context to agent
|
||||||
| ├── ranked.jsonl # Cleaned trigger event response from scorer frontend
|
| ├── ranked.jsonl # Cleaned trigger event response from scorer frontend
|
||||||
| └── results.jsonl # Output from wrapper script, read and modified by scorer
|
| └── results.jsonl # Output from wrapper script, read and modified by scorer
|
||||||
|
|||||||
@@ -244,22 +244,27 @@ async function ensureExampleClaimJsonlLoaded(): Promise<void> {
|
|||||||
input: stream,
|
input: stream,
|
||||||
crlfDelay: Infinity,
|
crlfDelay: Infinity,
|
||||||
});
|
});
|
||||||
|
let skipped = 0;
|
||||||
|
|
||||||
for await (const line of rl) {
|
for await (const line of rl) {
|
||||||
if (!line.trim()) continue; // skip empty lines
|
if (!line.trim()) continue; // skip empty lines
|
||||||
|
|
||||||
const row = JSON.parse(line);
|
const row = JSON.parse(line);
|
||||||
|
|
||||||
|
const parsed_content = row.events;
|
||||||
|
|
||||||
|
const filtered_content = parsed_content.filter(itm => itm.human_score > 0.5 && itm.score > 0.5)
|
||||||
|
|
||||||
|
if (filtered_content.length == 0) {
|
||||||
|
skipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const text = row.text;
|
const text = row.text;
|
||||||
|
|
||||||
const embedding = await embedText(text);
|
const embedding = await embedText(text);
|
||||||
|
|
||||||
jsonlRawtexts.push(text);
|
jsonlRawtexts.push(text);
|
||||||
|
|
||||||
const parsed_content = row.events;
|
|
||||||
|
|
||||||
const filtered_content = parsed_content.filter(itm => itm.human_score > 0.5 && itm.score > 0.5)
|
|
||||||
|
|
||||||
jsonlCleantexts.push(JSON.stringify(filtered_content));
|
jsonlCleantexts.push(JSON.stringify(filtered_content));
|
||||||
jsonlEmbeddings.push(embedding);
|
jsonlEmbeddings.push(embedding);
|
||||||
}
|
}
|
||||||
@@ -268,7 +273,7 @@ async function ensureExampleClaimJsonlLoaded(): Promise<void> {
|
|||||||
jsonlBM25 = buildBM25(jsonlRawtexts);
|
jsonlBM25 = buildBM25(jsonlRawtexts);
|
||||||
|
|
||||||
jsonlLoaded = true;
|
jsonlLoaded = true;
|
||||||
logger.info("JSONL ranking done");
|
logger.info("JSONL ranking done, %s items skipped for having no good events", skipped);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user