Files
LLMsForDisinformationAnalysis/agent/tools/webpageFetch.ts
T

29 lines
908 B
TypeScript

import { Builder, Browser } from "selenium-webdriver";
import firefox from "selenium-webdriver/firefox";
async function extractWebpageContent(url: string) : Promise<string>{
const options = new firefox.Options();
options.addArguments("--headless");
let driver = await new Builder().forBrowser(Browser.FIREFOX).setFirefoxOptions(options).build()
try {
await driver.get(url)
await driver.wait(async () => {
return await driver.executeScript(
"return document.readyState === 'complete'"
);
}, 5000);
const readableText = await driver.executeScript(
"return document.body.innerText;"
) as string;
return readableText
} finally {
await driver.quit()
}
}
//TODO: Extract, rank snippets
//console.log(await extractWebpageContent("https://www.bbc.co.uk/news/live/c74wd01egvyt"))