From 8317fd85df87cbdb242aa6f6f8770415e30a9ef3 Mon Sep 17 00:00:00 2001 From: William Jeynes Date: Tue, 24 Feb 2026 13:05:35 +0000 Subject: [PATCH] Add file logging for errors. Add exponential backoff retry to web search. On failed web search, do not crash pipeline, return placeholder text to loanguage model --- README.md | 1 + agent/package-lock.json | 7 +++++++ agent/package.json | 1 + agent/tools/webSearch.ts | 34 +++++++++++++++++++++++++++++----- agent/utils/logger.ts | 33 ++++++++++++++++++++++++--------- 5 files changed, 62 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index f9f8f3c..5836923 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Final Dissertation Submission Repository ├── run.sh # Bash script to run project elements from one place ├── data/ # Holder from project data | ├── blocked.jsonl # Web search results blocked by the Iffy list +| ├── error.log # Log file containing critical exceptions | ├── claims.json # Retreived claims from dbkf fetcher | ├── dev-eng.csv | ├── train-eng.csv # Normalized disinformation claims in CSV format from CLAN diff --git a/agent/package-lock.json b/agent/package-lock.json index 7f344b3..08c5227 100644 --- a/agent/package-lock.json +++ b/agent/package-lock.json @@ -18,6 +18,7 @@ "compute-cosine-similarity": "^1.1.0", "csv-parse": "^6.1.0", "dotenv": "^17.2.3", + "exponential-backoff": "^3.1.3", "fs": "^0.0.1-security", "langchain": "^1.2.14", "selenium-webdriver": "^4.40.0", @@ -1766,6 +1767,12 @@ "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", "license": "MIT" }, + "node_modules/exponential-backoff": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/exponential-backoff/-/exponential-backoff-3.1.3.tgz", + "integrity": "sha512-ZgEeZXj30q+I0EN+CbSSpIyPaJ5HVQD18Z1m+u1FXbAeT94mr1zw50q4q6jiiC447Nl/YTcIYSAftiGqetwXCA==", + "license": "Apache-2.0" + }, "node_modules/fecha": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz", diff --git a/agent/package.json b/agent/package.json index f82328f..3355099 100644 --- a/agent/package.json +++ b/agent/package.json @@ -22,6 +22,7 @@ "compute-cosine-similarity": "^1.1.0", "csv-parse": "^6.1.0", "dotenv": "^17.2.3", + "exponential-backoff": "^3.1.3", "fs": "^0.0.1-security", "langchain": "^1.2.14", "selenium-webdriver": "^4.40.0", diff --git a/agent/tools/webSearch.ts b/agent/tools/webSearch.ts index 54aeda0..073509b 100644 --- a/agent/tools/webSearch.ts +++ b/agent/tools/webSearch.ts @@ -1,8 +1,30 @@ import axios from "axios"; import { checkDisinfo } from "./checkDisinfo"; import { writeToJSONL } from "../utils/writeToJSONL"; +import { backOff } from "exponential-backoff"; +import { logger } from "../utils/logger"; export async function queryScraper(query: string): Promise { + try { + const response = await backOff(async () => { + return await queryScraperWorker(query); + }, { + numOfAttempts: 10, + startingDelay: 500, + timeMultiple: 2, + jitter: "full", + maxDelay: 50000, + }) + + return response; + } + catch (err: any) { + logger.error("Failed out of retry loop, returning placeholder to pipeline") + return ["API EXCEPTION"] + } +} + +async function queryScraperWorker(query: string): Promise { const instance = process.env.SCRAPER_INSTANCE; if (!instance) { throw new Error("SCRAPER_INSTANCE environment variable is not set"); @@ -28,9 +50,9 @@ export async function queryScraper(query: string): Promise { response = await axios.get(url, { params }); } catch (err: any) { if (err.response) { - throw new Error( - `HTTP error ${err.response.status}: ${JSON.stringify(err.response.data)}` - ); + const desc = `HTTP error ${err.response.status}: ${JSON.stringify(err.response.data)}` + logger.error(desc) + throw new Error(desc); } throw err; } @@ -38,7 +60,9 @@ export async function queryScraper(query: string): Promise { const data = response.data; if (data?.status !== "ok") { - throw new Error(`API returned status: ${data?.status}`); + const desc = `API returned status: ${data?.status}`; + logger.error(desc) + throw new Error(desc); } // TEMP?: Convert API results to array of formatted strings. @@ -47,7 +71,7 @@ export async function queryScraper(query: string): Promise { const lines: string[] = context.map((item: any) => { if (checkDisinfo(item.url)) { - writeToJSONL("blocked.jsonl", {url: item.url, query: query}) + writeToJSONL("blocked.jsonl", { url: item.url, query: query }) return ""; } diff --git a/agent/utils/logger.ts b/agent/utils/logger.ts index dcf02f2..9b161de 100644 --- a/agent/utils/logger.ts +++ b/agent/utils/logger.ts @@ -1,16 +1,31 @@ import winston from "winston"; +const consoleFormat = winston.format.combine( + winston.format.splat(), + winston.format.colorize(), + winston.format.timestamp({ format: "HH:mm:ss" }), + winston.format.printf(({ level, message, timestamp }) => { + return `${timestamp} ${level}: ${message}`; + }) +); + +const fileFormat = winston.format.combine( + winston.format.splat(), + winston.format.timestamp(), + winston.format.errors({ stack: true }), + winston.format.json() +); + export const logger = winston.createLogger({ level: "info", - format: winston.format.combine( - winston.format.splat(), - winston.format.colorize(), - winston.format.timestamp({ format: "HH:mm:ss" }), - winston.format.printf(({ level, message, timestamp }) => { - return `${timestamp} ${level}: ${message}`; - }) - ), transports: [ - new winston.transports.Console(), + new winston.transports.Console({ + format: consoleFormat, + }), + new winston.transports.File({ + filename: "../data/error.jsonl", + level: "error", + format: fileFormat, + }), ], }); \ No newline at end of file