Add filtering from known disinformation sources

This commit is contained in:
William Jeynes
2026-02-22 15:14:58 +00:00
parent 8ffe8dec82
commit d1ab938c0b
6 changed files with 90 additions and 8 deletions
+55
View File
@@ -0,0 +1,55 @@
import path from "path";
import { logger } from "../utils/logger"
import fs from "fs";
import { parse } from "tldts";
type RawSiteRecord = {
Domain: string;
Score: string;
};
type SiteScore = {
domain: string;
score: number;
};
const FILE_PATH = "../data/Iffy.json"
function parseSiteScores(): SiteScore[] {
const raw = fs.readFileSync(path.resolve(FILE_PATH), "utf-8");
const data: unknown = JSON.parse(raw);
if (!Array.isArray(data)) {
throw new Error("Invalid JSON: expected array");
}
return data.map((item) => {
const record = item as RawSiteRecord;
return {
domain: record.Domain,
score: Number(record.Score),
};
});
}
let scores: SiteScore[] | null = null
export function checkDisinfo(url: string): boolean {
if (scores == null) {
scores = parseSiteScores();
}
const domain = parse(url).domain;
const match = scores.find(itm => itm.domain == domain)
if (match != null) {
logger.warn("Bad source %s detected with score %s", url, match.score)
return true;
}
return false;
}
// console.log(checkDisinfo("http://www.zerohedge.com"))
+5
View File
@@ -1,4 +1,5 @@
import axios from "axios";
import { checkDisinfo } from "./checkDisinfo";
export async function queryScraper(query: string): Promise<string[]> {
const instance = process.env.SCRAPER_INSTANCE;
@@ -44,6 +45,10 @@ export async function queryScraper(query: string): Promise<string[]> {
const context = data.web ?? [];
const lines: string[] = context.map((item: any) => {
if (checkDisinfo(item.url)) {
return "";
}
const title = (item.title ?? "").trim();
const desc = (item.description ?? "").trim();
const link = (item.url ?? "").trim();