Add file logging for errors. Add exponential backoff retry to web search. On failed web search, do not crash pipeline, return placeholder text to loanguage model

This commit is contained in:
William Jeynes
2026-02-24 13:05:35 +00:00
parent 3d0cacd24e
commit 8317fd85df
5 changed files with 62 additions and 14 deletions
+1
View File
@@ -12,6 +12,7 @@ Final Dissertation Submission Repository
├── run.sh # Bash script to run project elements from one place ├── run.sh # Bash script to run project elements from one place
├── data/ # Holder from project data ├── data/ # Holder from project data
| ├── blocked.jsonl # Web search results blocked by the Iffy list | ├── blocked.jsonl # Web search results blocked by the Iffy list
| ├── error.log # Log file containing critical exceptions
| ├── claims.json # Retreived claims from dbkf fetcher | ├── claims.json # Retreived claims from dbkf fetcher
| ├── dev-eng.csv | ├── dev-eng.csv
| ├── train-eng.csv # Normalized disinformation claims in CSV format from CLAN | ├── train-eng.csv # Normalized disinformation claims in CSV format from CLAN
+7
View File
@@ -18,6 +18,7 @@
"compute-cosine-similarity": "^1.1.0", "compute-cosine-similarity": "^1.1.0",
"csv-parse": "^6.1.0", "csv-parse": "^6.1.0",
"dotenv": "^17.2.3", "dotenv": "^17.2.3",
"exponential-backoff": "^3.1.3",
"fs": "^0.0.1-security", "fs": "^0.0.1-security",
"langchain": "^1.2.14", "langchain": "^1.2.14",
"selenium-webdriver": "^4.40.0", "selenium-webdriver": "^4.40.0",
@@ -1766,6 +1767,12 @@
"integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/exponential-backoff": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/exponential-backoff/-/exponential-backoff-3.1.3.tgz",
"integrity": "sha512-ZgEeZXj30q+I0EN+CbSSpIyPaJ5HVQD18Z1m+u1FXbAeT94mr1zw50q4q6jiiC447Nl/YTcIYSAftiGqetwXCA==",
"license": "Apache-2.0"
},
"node_modules/fecha": { "node_modules/fecha": {
"version": "4.2.3", "version": "4.2.3",
"resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz", "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz",
+1
View File
@@ -22,6 +22,7 @@
"compute-cosine-similarity": "^1.1.0", "compute-cosine-similarity": "^1.1.0",
"csv-parse": "^6.1.0", "csv-parse": "^6.1.0",
"dotenv": "^17.2.3", "dotenv": "^17.2.3",
"exponential-backoff": "^3.1.3",
"fs": "^0.0.1-security", "fs": "^0.0.1-security",
"langchain": "^1.2.14", "langchain": "^1.2.14",
"selenium-webdriver": "^4.40.0", "selenium-webdriver": "^4.40.0",
+29 -5
View File
@@ -1,8 +1,30 @@
import axios from "axios"; import axios from "axios";
import { checkDisinfo } from "./checkDisinfo"; import { checkDisinfo } from "./checkDisinfo";
import { writeToJSONL } from "../utils/writeToJSONL"; import { writeToJSONL } from "../utils/writeToJSONL";
import { backOff } from "exponential-backoff";
import { logger } from "../utils/logger";
export async function queryScraper(query: string): Promise<string[]> { export async function queryScraper(query: string): Promise<string[]> {
try {
const response = await backOff(async () => {
return await queryScraperWorker(query);
}, {
numOfAttempts: 10,
startingDelay: 500,
timeMultiple: 2,
jitter: "full",
maxDelay: 50000,
})
return response;
}
catch (err: any) {
logger.error("Failed out of retry loop, returning placeholder to pipeline")
return ["API EXCEPTION"]
}
}
async function queryScraperWorker(query: string): Promise<string[]> {
const instance = process.env.SCRAPER_INSTANCE; const instance = process.env.SCRAPER_INSTANCE;
if (!instance) { if (!instance) {
throw new Error("SCRAPER_INSTANCE environment variable is not set"); throw new Error("SCRAPER_INSTANCE environment variable is not set");
@@ -28,9 +50,9 @@ export async function queryScraper(query: string): Promise<string[]> {
response = await axios.get(url, { params }); response = await axios.get(url, { params });
} catch (err: any) { } catch (err: any) {
if (err.response) { if (err.response) {
throw new Error( const desc = `HTTP error ${err.response.status}: ${JSON.stringify(err.response.data)}`
`HTTP error ${err.response.status}: ${JSON.stringify(err.response.data)}` logger.error(desc)
); throw new Error(desc);
} }
throw err; throw err;
} }
@@ -38,7 +60,9 @@ export async function queryScraper(query: string): Promise<string[]> {
const data = response.data; const data = response.data;
if (data?.status !== "ok") { if (data?.status !== "ok") {
throw new Error(`API returned status: ${data?.status}`); const desc = `API returned status: ${data?.status}`;
logger.error(desc)
throw new Error(desc);
} }
// TEMP?: Convert API results to array of formatted strings. // TEMP?: Convert API results to array of formatted strings.
@@ -47,7 +71,7 @@ export async function queryScraper(query: string): Promise<string[]> {
const lines: string[] = context.map((item: any) => { const lines: string[] = context.map((item: any) => {
if (checkDisinfo(item.url)) { if (checkDisinfo(item.url)) {
writeToJSONL("blocked.jsonl", {url: item.url, query: query}) writeToJSONL("blocked.jsonl", { url: item.url, query: query })
return ""; return "";
} }
+20 -5
View File
@@ -1,16 +1,31 @@
import winston from "winston"; import winston from "winston";
export const logger = winston.createLogger({ const consoleFormat = winston.format.combine(
level: "info",
format: winston.format.combine(
winston.format.splat(), winston.format.splat(),
winston.format.colorize(), winston.format.colorize(),
winston.format.timestamp({ format: "HH:mm:ss" }), winston.format.timestamp({ format: "HH:mm:ss" }),
winston.format.printf(({ level, message, timestamp }) => { winston.format.printf(({ level, message, timestamp }) => {
return `${timestamp} ${level}: ${message}`; return `${timestamp} ${level}: ${message}`;
}) })
), );
const fileFormat = winston.format.combine(
winston.format.splat(),
winston.format.timestamp(),
winston.format.errors({ stack: true }),
winston.format.json()
);
export const logger = winston.createLogger({
level: "info",
transports: [ transports: [
new winston.transports.Console(), new winston.transports.Console({
format: consoleFormat,
}),
new winston.transports.File({
filename: "../data/error.jsonl",
level: "error",
format: fileFormat,
}),
], ],
}); });