Add file logging for errors. Add exponential backoff retry to web search. On failed web search, do not crash pipeline, return placeholder text to loanguage model
This commit is contained in:
@@ -12,6 +12,7 @@ Final Dissertation Submission Repository
|
||||
├── run.sh # Bash script to run project elements from one place
|
||||
├── data/ # Holder from project data
|
||||
| ├── blocked.jsonl # Web search results blocked by the Iffy list
|
||||
| ├── error.log # Log file containing critical exceptions
|
||||
| ├── claims.json # Retreived claims from dbkf fetcher
|
||||
| ├── dev-eng.csv
|
||||
| ├── train-eng.csv # Normalized disinformation claims in CSV format from CLAN
|
||||
|
||||
Generated
+7
@@ -18,6 +18,7 @@
|
||||
"compute-cosine-similarity": "^1.1.0",
|
||||
"csv-parse": "^6.1.0",
|
||||
"dotenv": "^17.2.3",
|
||||
"exponential-backoff": "^3.1.3",
|
||||
"fs": "^0.0.1-security",
|
||||
"langchain": "^1.2.14",
|
||||
"selenium-webdriver": "^4.40.0",
|
||||
@@ -1766,6 +1767,12 @@
|
||||
"integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/exponential-backoff": {
|
||||
"version": "3.1.3",
|
||||
"resolved": "https://registry.npmjs.org/exponential-backoff/-/exponential-backoff-3.1.3.tgz",
|
||||
"integrity": "sha512-ZgEeZXj30q+I0EN+CbSSpIyPaJ5HVQD18Z1m+u1FXbAeT94mr1zw50q4q6jiiC447Nl/YTcIYSAftiGqetwXCA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/fecha": {
|
||||
"version": "4.2.3",
|
||||
"resolved": "https://registry.npmjs.org/fecha/-/fecha-4.2.3.tgz",
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
"compute-cosine-similarity": "^1.1.0",
|
||||
"csv-parse": "^6.1.0",
|
||||
"dotenv": "^17.2.3",
|
||||
"exponential-backoff": "^3.1.3",
|
||||
"fs": "^0.0.1-security",
|
||||
"langchain": "^1.2.14",
|
||||
"selenium-webdriver": "^4.40.0",
|
||||
|
||||
@@ -1,8 +1,30 @@
|
||||
import axios from "axios";
|
||||
import { checkDisinfo } from "./checkDisinfo";
|
||||
import { writeToJSONL } from "../utils/writeToJSONL";
|
||||
import { backOff } from "exponential-backoff";
|
||||
import { logger } from "../utils/logger";
|
||||
|
||||
export async function queryScraper(query: string): Promise<string[]> {
|
||||
try {
|
||||
const response = await backOff(async () => {
|
||||
return await queryScraperWorker(query);
|
||||
}, {
|
||||
numOfAttempts: 10,
|
||||
startingDelay: 500,
|
||||
timeMultiple: 2,
|
||||
jitter: "full",
|
||||
maxDelay: 50000,
|
||||
})
|
||||
|
||||
return response;
|
||||
}
|
||||
catch (err: any) {
|
||||
logger.error("Failed out of retry loop, returning placeholder to pipeline")
|
||||
return ["API EXCEPTION"]
|
||||
}
|
||||
}
|
||||
|
||||
async function queryScraperWorker(query: string): Promise<string[]> {
|
||||
const instance = process.env.SCRAPER_INSTANCE;
|
||||
if (!instance) {
|
||||
throw new Error("SCRAPER_INSTANCE environment variable is not set");
|
||||
@@ -28,9 +50,9 @@ export async function queryScraper(query: string): Promise<string[]> {
|
||||
response = await axios.get(url, { params });
|
||||
} catch (err: any) {
|
||||
if (err.response) {
|
||||
throw new Error(
|
||||
`HTTP error ${err.response.status}: ${JSON.stringify(err.response.data)}`
|
||||
);
|
||||
const desc = `HTTP error ${err.response.status}: ${JSON.stringify(err.response.data)}`
|
||||
logger.error(desc)
|
||||
throw new Error(desc);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
@@ -38,7 +60,9 @@ export async function queryScraper(query: string): Promise<string[]> {
|
||||
const data = response.data;
|
||||
|
||||
if (data?.status !== "ok") {
|
||||
throw new Error(`API returned status: ${data?.status}`);
|
||||
const desc = `API returned status: ${data?.status}`;
|
||||
logger.error(desc)
|
||||
throw new Error(desc);
|
||||
}
|
||||
|
||||
// TEMP?: Convert API results to array of formatted strings.
|
||||
@@ -47,7 +71,7 @@ export async function queryScraper(query: string): Promise<string[]> {
|
||||
|
||||
const lines: string[] = context.map((item: any) => {
|
||||
if (checkDisinfo(item.url)) {
|
||||
writeToJSONL("blocked.jsonl", {url: item.url, query: query})
|
||||
writeToJSONL("blocked.jsonl", { url: item.url, query: query })
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
+24
-9
@@ -1,16 +1,31 @@
|
||||
import winston from "winston";
|
||||
|
||||
const consoleFormat = winston.format.combine(
|
||||
winston.format.splat(),
|
||||
winston.format.colorize(),
|
||||
winston.format.timestamp({ format: "HH:mm:ss" }),
|
||||
winston.format.printf(({ level, message, timestamp }) => {
|
||||
return `${timestamp} ${level}: ${message}`;
|
||||
})
|
||||
);
|
||||
|
||||
const fileFormat = winston.format.combine(
|
||||
winston.format.splat(),
|
||||
winston.format.timestamp(),
|
||||
winston.format.errors({ stack: true }),
|
||||
winston.format.json()
|
||||
);
|
||||
|
||||
export const logger = winston.createLogger({
|
||||
level: "info",
|
||||
format: winston.format.combine(
|
||||
winston.format.splat(),
|
||||
winston.format.colorize(),
|
||||
winston.format.timestamp({ format: "HH:mm:ss" }),
|
||||
winston.format.printf(({ level, message, timestamp }) => {
|
||||
return `${timestamp} ${level}: ${message}`;
|
||||
})
|
||||
),
|
||||
transports: [
|
||||
new winston.transports.Console(),
|
||||
new winston.transports.Console({
|
||||
format: consoleFormat,
|
||||
}),
|
||||
new winston.transports.File({
|
||||
filename: "../data/error.jsonl",
|
||||
level: "error",
|
||||
format: fileFormat,
|
||||
}),
|
||||
],
|
||||
});
|
||||
Reference in New Issue
Block a user