Do not enter existing data if it has no good trigger events

This commit is contained in:
William Jeynes
2026-02-22 15:29:48 +00:00
parent d1ab938c0b
commit 2f33338007
2 changed files with 12 additions and 6 deletions
+1
View File
@@ -14,6 +14,7 @@ Final Dissertation Submission Repository
| ├── claims.json # Retreived claims from dbkf fetcher | ├── claims.json # Retreived claims from dbkf fetcher
| ├── dev-eng.csv | ├── dev-eng.csv
| ├── train-eng.csv # Normalized disinformation claims in CSV format from CLAN | ├── train-eng.csv # Normalized disinformation claims in CSV format from CLAN
| ├── Iffy.json # Iffy dataset of disinformation domains
| ├── input.jsonl # Response in cleaned format to give as context to agent | ├── input.jsonl # Response in cleaned format to give as context to agent
| ├── ranked.jsonl # Cleaned trigger event response from scorer frontend | ├── ranked.jsonl # Cleaned trigger event response from scorer frontend
| └── results.jsonl # Output from wrapper script, read and modified by scorer | └── results.jsonl # Output from wrapper script, read and modified by scorer
+11 -6
View File
@@ -244,22 +244,27 @@ async function ensureExampleClaimJsonlLoaded(): Promise<void> {
input: stream, input: stream,
crlfDelay: Infinity, crlfDelay: Infinity,
}); });
let skipped = 0;
for await (const line of rl) { for await (const line of rl) {
if (!line.trim()) continue; // skip empty lines if (!line.trim()) continue; // skip empty lines
const row = JSON.parse(line); const row = JSON.parse(line);
const parsed_content = row.events;
const filtered_content = parsed_content.filter(itm => itm.human_score > 0.5 && itm.score > 0.5)
if (filtered_content.length == 0) {
skipped++;
continue;
}
const text = row.text; const text = row.text;
const embedding = await embedText(text); const embedding = await embedText(text);
jsonlRawtexts.push(text); jsonlRawtexts.push(text);
const parsed_content = row.events;
const filtered_content = parsed_content.filter(itm => itm.human_score > 0.5 && itm.score > 0.5)
jsonlCleantexts.push(JSON.stringify(filtered_content)); jsonlCleantexts.push(JSON.stringify(filtered_content));
jsonlEmbeddings.push(embedding); jsonlEmbeddings.push(embedding);
} }
@@ -268,7 +273,7 @@ async function ensureExampleClaimJsonlLoaded(): Promise<void> {
jsonlBM25 = buildBM25(jsonlRawtexts); jsonlBM25 = buildBM25(jsonlRawtexts);
jsonlLoaded = true; jsonlLoaded = true;
logger.info("JSONL ranking done"); logger.info("JSONL ranking done, %s items skipped for having no good events", skipped);
} }