Add offset and limit in pereparation for the large dataset
This commit is contained in:
+1
-2
@@ -29,5 +29,4 @@ Experiments with different model types:
|
|||||||
| gpt-5.4-mini | 29/278 | 10.43 |
|
| gpt-5.4-mini | 29/278 | 10.43 |
|
||||||
| gpt-5.4-nano | 6/210 | 2.85 |
|
| gpt-5.4-nano | 6/210 | 2.85 |
|
||||||
| gpt-4.1-mini | 15/269 | 5.57 |
|
| gpt-4.1-mini | 15/269 | 5.57 |
|
||||||
| gpt-4o-mini | 27/287 | 9.407 |
|
| gpt-4o-mini | 27/287 | 9.407 |
|
||||||
| llama3.1:8b-instruct-q4_K_M | ? | ? |
|
|
||||||
@@ -19,6 +19,9 @@ const MODE = process.env.MODE ?? "claim";
|
|||||||
|
|
||||||
const MAX_CONCURRENCY = 5;
|
const MAX_CONCURRENCY = 5;
|
||||||
|
|
||||||
|
const OFFSET = parseInt(process.env.OFFSET ?? "0", 10);
|
||||||
|
const LIMIT = process.env.LIMIT ? parseInt(process.env.LIMIT, 10) : null;
|
||||||
|
|
||||||
const client = new Client({ apiUrl: API_URL });
|
const client = new Client({ apiUrl: API_URL });
|
||||||
|
|
||||||
|
|
||||||
@@ -164,10 +167,19 @@ async function processRecord(record: any): Promise<ResultRecord> {
|
|||||||
async function main() {
|
async function main() {
|
||||||
console.log("Reading input file...");
|
console.log("Reading input file...");
|
||||||
|
|
||||||
const records = await loadInputs();
|
const allRecords = await loadInputs();
|
||||||
|
|
||||||
console.log(`Loaded ${records.length} records`);
|
console.log(`Loaded ${allRecords.length} records`);
|
||||||
|
|
||||||
|
const records = allRecords.slice(
|
||||||
|
OFFSET,
|
||||||
|
LIMIT !== null ? OFFSET + LIMIT : undefined
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
`Processing ${records.length} records (offset=${OFFSET}, limit=${LIMIT ?? "∞"})`
|
||||||
|
);
|
||||||
|
|
||||||
fs.writeFileSync(OUTPUT_FILE, "", { flag: "a" });
|
fs.writeFileSync(OUTPUT_FILE, "", { flag: "a" });
|
||||||
|
|
||||||
const limit = pLimit(MAX_CONCURRENCY);
|
const limit = pLimit(MAX_CONCURRENCY);
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ DEFAULT_PARAMS = [
|
|||||||
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
|
("organization", "http://weverify.eu/resource/Organization/3727f7b2aa90ec0716693e5464b28d18"), # StopFake
|
||||||
]
|
]
|
||||||
|
|
||||||
NUM_RANDOM_CLAIMS = 200
|
NUM_RANDOM_CLAIMS = 2000
|
||||||
|
|
||||||
INPUT_FILE = "../../data/input.jsonl"
|
INPUT_FILE = "../../data/input.jsonl"
|
||||||
OUTPUT_FILE = "../../data/claims.json"
|
OUTPUT_FILE = "../../data/claims.json"
|
||||||
|
|||||||
Reference in New Issue
Block a user