From 250d49d9350d2a1a4cea4115cc777112efdfae30 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 26 Mar 2024 11:19:42 -0700 Subject: [PATCH] avoid cloudflare detection of puppeteer when using browser profiles: - filter out 'other' targets from puppeteer attachment - disable '--disable-site-isolation-trials' for profiles - workaround for #446 with profiles --- src/crawler.ts | 6 +----- src/create-login-profile.ts | 17 ++++++++++++----- src/util/browser.ts | 18 +++++++++++++++++- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/src/crawler.ts b/src/crawler.ts index 7e861c8cd..2972fa714 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -891,11 +891,7 @@ self.__bx_behaviors.selectMainBehavior(); data.loadState = LoadState.EXTRACTION_DONE; - if (data.status >= 400) { - return; - } - - if (this.params.behaviorOpts) { + if (this.params.behaviorOpts && data.status < 400) { if (!data.isHTMLPage) { logger.debug( "Skipping behaviors for non-HTML page", diff --git a/src/create-login-profile.ts b/src/create-login-profile.ts index 1fb110469..a157a2f95 100755 --- a/src/create-login-profile.ts +++ b/src/create-login-profile.ts @@ -223,15 +223,11 @@ async function main() { ); } - logger.info(`Loading page: ${params.url}`); - - await page.goto(params.url, { waitUntil }); - if (!params.automated) { const target = await cdp.send("Target.getTargetInfo"); const targetId = target.targetInfo.targetId; - new InteractiveBrowser(params, browser, page, cdp, targetId); + new InteractiveBrowser(params, browser, page, cdp, targetId, waitUntil); } else { await automatedProfile(params, browser, page, cdp, waitUntil); } @@ -248,6 +244,10 @@ async function automatedProfile( ) { let u, p; + logger.info(`Loading page: ${params.url}`); + + await page.goto(params.url, { waitUntil }); + logger.debug("Looking for username and password entry fields on page..."); try { @@ -372,6 +372,7 @@ class InteractiveBrowser { page: Page, cdp: CDPSession, targetId: string, + waitUntil: PuppeteerLifeCycleEvent = "load", ) { logger.info("Creating Profile Interactively..."); child_process.spawn("socat", [ @@ -427,6 +428,12 @@ class InteractiveBrowser { } else { logger.info("Screencasting with CDP on port 9222"); } + + logger.info(`Loading page: ${params.url}`); + + page.goto(params.url, { waitUntil, timeout: 0 }).finally(() => { + logger.info("Loaded!"); + }); } handlePageLoad() { diff --git a/src/util/browser.ts b/src/util/browser.ts index b99dfee33..71f215124 100644 --- a/src/util/browser.ts +++ b/src/util/browser.ts @@ -85,6 +85,10 @@ export class Browser { const args = this.chromeArgs(chromeOptions); + if (recording) { + args.push("--disable-site-isolation-trials"); + } + let defaultViewport = null; if (process.env.GEOMETRY) { @@ -107,11 +111,24 @@ export class Browser { defaultViewport, waitForInitialPage: false, userDataDir: this.profileDir, + targetFilter: recording + ? undefined + : (target) => this.targetFilter(target), }; await this._init(launchOpts, ondisconnect, recording); } + targetFilter(target: Target) { + const attach = !(!target.url() && target.type() === "other"); + logger.debug( + "Target Filter", + { url: target.url(), type: target.type(), attach }, + "browser", + ); + return attach; + } + async setupPage({ page }: { page: Page; cdp: CDPSession }) { await this.addInitScript( page, @@ -215,7 +232,6 @@ export class Browser { "--remote-debugging-port=9221", "--remote-allow-origins=*", "--autoplay-policy=no-user-gesture-required", - "--disable-site-isolation-trials", `--user-agent=${userAgent || this.getDefaultUA()}`, ...extraArgs, ];