diff --git a/src/crawler.ts b/src/crawler.ts index 91a7d80c7..a0f8e9b68 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -33,7 +33,7 @@ import { Screenshots } from "./util/screenshots.js"; import { parseArgs } from "./util/argParser.js"; import { initRedis } from "./util/redis.js"; import { logger, formatErr } from "./util/logger.js"; -import { WorkerOpts, WorkerState, runWorkers } from "./util/worker.js"; +import { WorkerState, runWorkers } from "./util/worker.js"; import { sleep, timedRun, secondsElapsed } from "./util/timing.js"; import { collectAllFileSources } from "./util/file_reader.js"; @@ -869,7 +869,7 @@ self.__bx_behaviors.selectMainBehavior(); await this.checkLimits(); } - async teardownPage({ workerid }: WorkerOpts) { + async teardownPage({ workerid }: WorkerState) { if (this.screencaster) { await this.screencaster.stopById(workerid); } diff --git a/src/util/state.ts b/src/util/state.ts index e084992f3..c75e9487c 100644 --- a/src/util/state.ts +++ b/src/util/state.ts @@ -1,4 +1,5 @@ import { Redis, Result, Callback } from "ioredis"; +import { v4 as uuidv4 } from "uuid"; import { logger } from "./logger.js"; @@ -50,7 +51,7 @@ export class PageState { workerid!: WorkerId; - pageid?: string; + pageid: string; title?: string; mime?: string; ts?: Date; @@ -72,6 +73,7 @@ export class PageState { this.seedId = redisData.seedId; this.depth = redisData.depth; this.extraHops = redisData.extraHops || 0; + this.pageid = uuidv4(); this.status = 0; } } diff --git a/src/util/worker.ts b/src/util/worker.ts index e05dad308..e8b21247f 100644 --- a/src/util/worker.ts +++ b/src/util/worker.ts @@ -1,7 +1,5 @@ import os from "os"; -import { v4 as uuidv4 } from "uuid"; - import { logger, formatErr } from "./logger.js"; import { sleep, timedRun } from "./timing.js"; import { Recorder } from "./recorder.js"; @@ -54,9 +52,7 @@ export async function runWorkers( } // =========================================================================== -// TODO: Fix this the next time the file is edited. -// eslint-disable-next-line @typescript-eslint/no-explicit-any -export type WorkerOpts = Record & { +type WorkerOpts = { page: Page; cdp: CDPSession; workerid: WorkerId; @@ -306,16 +302,12 @@ export class PageWorker { async timedCrawlPage(opts: WorkerState) { const workerid = this.id; const { data } = opts; - const { url } = data; + const { url, pageid } = data; logger.info("Starting page", { workerid, page: url }, "worker"); this.logDetails = { page: url, workerid }; - // set new page id - const pageid = uuidv4(); - data.pageid = pageid; - if (this.recorder) { this.recorder.startPage({ pageid, url }); }