Do not enter existing data if it has no good trigger events
This commit is contained in:
@@ -14,6 +14,7 @@ Final Dissertation Submission Repository
|
||||
| ├── claims.json # Retreived claims from dbkf fetcher
|
||||
| ├── dev-eng.csv
|
||||
| ├── train-eng.csv # Normalized disinformation claims in CSV format from CLAN
|
||||
| ├── Iffy.json # Iffy dataset of disinformation domains
|
||||
| ├── input.jsonl # Response in cleaned format to give as context to agent
|
||||
| ├── ranked.jsonl # Cleaned trigger event response from scorer frontend
|
||||
| └── results.jsonl # Output from wrapper script, read and modified by scorer
|
||||
|
||||
@@ -244,22 +244,27 @@ async function ensureExampleClaimJsonlLoaded(): Promise<void> {
|
||||
input: stream,
|
||||
crlfDelay: Infinity,
|
||||
});
|
||||
let skipped = 0;
|
||||
|
||||
for await (const line of rl) {
|
||||
if (!line.trim()) continue; // skip empty lines
|
||||
|
||||
const row = JSON.parse(line);
|
||||
|
||||
const parsed_content = row.events;
|
||||
|
||||
const filtered_content = parsed_content.filter(itm => itm.human_score > 0.5 && itm.score > 0.5)
|
||||
|
||||
if (filtered_content.length == 0) {
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const text = row.text;
|
||||
|
||||
const embedding = await embedText(text);
|
||||
|
||||
jsonlRawtexts.push(text);
|
||||
|
||||
const parsed_content = row.events;
|
||||
|
||||
const filtered_content = parsed_content.filter(itm => itm.human_score > 0.5 && itm.score > 0.5)
|
||||
|
||||
jsonlCleantexts.push(JSON.stringify(filtered_content));
|
||||
jsonlEmbeddings.push(embedding);
|
||||
}
|
||||
@@ -268,7 +273,7 @@ async function ensureExampleClaimJsonlLoaded(): Promise<void> {
|
||||
jsonlBM25 = buildBM25(jsonlRawtexts);
|
||||
|
||||
jsonlLoaded = true;
|
||||
logger.info("JSONL ranking done");
|
||||
logger.info("JSONL ranking done, %s items skipped for having no good events", skipped);
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user