Skip to content

Commit

Permalink
profiles: handle terminate signals directly (#500)
Browse files Browse the repository at this point in the history
- add our own signal handling to create-login-profile to ensure fast
exit in k8s
- print crawler version info string on startup
  • Loading branch information
ikreymer committed Mar 18, 2024
1 parent 4d64eed commit 5060e6b
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 19 deletions.
20 changes: 2 additions & 18 deletions src/crawler.ts
Expand Up @@ -35,7 +35,7 @@ import { initRedis } from "./util/redis.js";
import { logger, formatErr } from "./util/logger.js";
import { WorkerOpts, WorkerState, runWorkers } from "./util/worker.js";
import { sleep, timedRun, secondsElapsed } from "./util/timing.js";
import { collectAllFileSources } from "./util/file_reader.js";
import { collectAllFileSources, getInfoString } from "./util/file_reader.js";

import { Browser } from "./util/browser.js";

Expand Down Expand Up @@ -428,7 +428,7 @@ export class Crawler {
this.logFH = fs.createWriteStream(this.logFilename);
logger.setExternalLogStream(this.logFH);

this.infoString = await this.getInfoString();
this.infoString = await getInfoString();
logger.info(this.infoString);

logger.info("Seeds", this.params.scopedSeeds);
Expand Down Expand Up @@ -1008,22 +1008,6 @@ self.__bx_behaviors.selectMainBehavior();
return res ? frame : null;
}

async getInfoString() {
const packageFileJSON = JSON.parse(
await fsp.readFile(new URL("../package.json", import.meta.url), {
encoding: "utf-8",
}),
);
const warcioPackageJSON = JSON.parse(
await fsp.readFile(
new URL("../node_modules/warcio/package.json", import.meta.url),
{ encoding: "utf-8" },
),
);

return `Browsertrix-Crawler ${packageFileJSON.version} (with warcio.js ${warcioPackageJSON.version})`;
}

async createWARCInfo(filename: string) {
const warcVersion = "WARC/1.0";
const type = "warcinfo";
Expand Down
14 changes: 13 additions & 1 deletion src/create-login-profile.ts
Expand Up @@ -14,6 +14,7 @@ import { logger } from "./util/logger.js";
import { Browser } from "./util/browser.js";
import { initStorage } from "./util/storage.js";
import { CDPSession, Page, PuppeteerLifeCycleEvent } from "puppeteer-core";
import { getInfoString } from "./util/file_reader.js";

const profileHTML = fs.readFileSync(
new URL("../html/createProfile.html", import.meta.url),
Expand Down Expand Up @@ -118,6 +119,11 @@ function getDefaultWindowSize() {
return `${x},${y}`;
}

function handleTerminate(signame: string) {
logger.info(`Got signal ${signame}, exiting`);
process.exit(1);
}

async function main() {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const params: any = yargs(process.argv)
Expand All @@ -126,6 +132,12 @@ async function main() {

logger.setDebugLogging(true);

logger.info(await getInfoString());

process.on("SIGINT", () => handleTerminate("SIGINT"));

process.on("SIGTERM", () => handleTerminate("SIGTERM"));

if (!params.headless) {
logger.debug("Launching XVFB");
child_process.spawn("Xvfb", [
Expand Down Expand Up @@ -164,7 +176,7 @@ async function main() {
await browser.launch({
profileUrl: params.profile,
headless: params.headless,
signals: true,
signals: false,
chromeOptions: {
proxy: false,
extraArgs: [
Expand Down
17 changes: 17 additions & 0 deletions src/util/file_reader.ts
@@ -1,4 +1,5 @@
import fs from "fs";
import fsp from "fs/promises";
import path from "path";

const MAX_DEPTH = 2;
Expand Down Expand Up @@ -48,3 +49,19 @@ export function collectAllFileSources(

return [];
}

export async function getInfoString() {
const packageFileJSON = JSON.parse(
await fsp.readFile(new URL("../../package.json", import.meta.url), {
encoding: "utf-8",
}),
);
const warcioPackageJSON = JSON.parse(
await fsp.readFile(
new URL("../../node_modules/warcio/package.json", import.meta.url),
{ encoding: "utf-8" },
),
);

return `Browsertrix-Crawler ${packageFileJSON.version} (with warcio.js ${warcioPackageJSON.version})`;
}

0 comments on commit 5060e6b

Please sign in to comment.