Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better tracking of failed requests + logging context exclude #485

Merged
merged 5 commits into from Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
@@ -1,6 +1,6 @@
{
"name": "browsertrix-crawler",
"version": "1.0.0-beta.6",
"version": "1.0.0-beta.7",
"main": "browsertrix-crawler",
"type": "module",
"repository": "https://github.com/webrecorder/browsertrix-crawler",
Expand Down
3 changes: 2 additions & 1 deletion src/crawler.ts
Expand Up @@ -189,7 +189,8 @@ export class Crawler {
const debugLogging = this.params.logging.includes("debug");
logger.setDebugLogging(debugLogging);
logger.setLogLevel(this.params.logLevel);
logger.setContext(this.params.context);
logger.setContext(this.params.logContext);
logger.setExcludeContext(this.params.logExcludeContext);

// if automatically restarts on error exit code,
// exit with 0 from fatal by default, to avoid unnecessary restart
Expand Down
14 changes: 13 additions & 1 deletion src/util/argParser.ts
Expand Up @@ -15,7 +15,11 @@ import {
import { ScopedSeed } from "./seeds.js";
import { interpolateFilename } from "./storage.js";
import { screenshotTypes } from "./screenshots.js";
import { LOG_CONTEXT_TYPES, logger } from "./logger.js";
import {
DEFAULT_EXCLUDE_LOG_CONTEXTS,
LOG_CONTEXT_TYPES,
logger,
} from "./logger.js";

// ============================================================================
class ArgParser {
Expand Down Expand Up @@ -225,6 +229,14 @@ class ArgParser {
coerce,
},

logExcludeContext: {
describe: "Comma-separated list of contexts to NOT include in logs",
type: "array",
default: DEFAULT_EXCLUDE_LOG_CONTEXTS,
choices: LOG_CONTEXT_TYPES,
coerce,
},

text: {
describe:
"Extract initial (default) or final text to pages.jsonl or WARC resource record(s)",
Expand Down
24 changes: 21 additions & 3 deletions src/util/logger.ts
Expand Up @@ -26,6 +26,7 @@ export const LOG_CONTEXT_TYPES = [
"general",
"worker",
"recorder",
"recorderNetwork",
"writer",
"state",
"redis",
Expand All @@ -51,13 +52,20 @@ export const LOG_CONTEXT_TYPES = [

export type LogContext = (typeof LOG_CONTEXT_TYPES)[number];

export const DEFAULT_EXCLUDE_LOG_CONTEXTS: LogContext[] = [
"recorderNetwork",
"jsError",
"screencast",
];

// ===========================================================================
class Logger {
logStream: Writable | null = null;
debugLogging = false;
logErrorsToRedis = false;
logLevels: string[] = [];
contexts: string[] = [];
contexts: LogContext[] = [];
excludeContexts: LogContext[] = [];
crawlState?: RedisCrawlState | null = null;
fatalExitCode = 17;

Expand All @@ -81,18 +89,22 @@ class Logger {
this.logLevels = logLevels;
}

setContext(contexts: string[]) {
setContext(contexts: LogContext[]) {
this.contexts = contexts;
}

setExcludeContext(contexts: LogContext[]) {
this.excludeContexts = contexts;
}

setCrawlState(crawlState: RedisCrawlState) {
this.crawlState = crawlState;
}

logAsJSON(
message: string,
dataUnknown: unknown,
context: string,
context: LogContext,
logLevel = "info",
) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand All @@ -110,6 +122,12 @@ class Logger {
}
}

if (this.excludeContexts.length) {
if (this.excludeContexts.indexOf(context) >= 0) {
return;
}
}

const dataToLog = {
timestamp: new Date().toISOString(),
logLevel: logLevel,
Expand Down