Skip to content

Commit

Permalink
avoid cloudflare detection of puppeteer when using browser profiles: (#…
Browse files Browse the repository at this point in the history
…518)

- filter out 'other' / no url targets from puppeteer attachment
- disable '--disable-site-isolation-trials' for profiles
- workaround for #446 with profiles
- also fixes `pageExtraDelay` not working for non-200 responses - may be
useful for detecting captcha blocked pages.
- connect VNC right away instead of waiting for page to fully finish
loading, hopefully resulting in faster profile start-up time.
  • Loading branch information
ikreymer committed Mar 28, 2024
1 parent 0d973d6 commit ea098b6
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 11 deletions.
6 changes: 1 addition & 5 deletions src/crawler.ts
Expand Up @@ -891,11 +891,7 @@ self.__bx_behaviors.selectMainBehavior();

data.loadState = LoadState.EXTRACTION_DONE;

if (data.status >= 400) {
return;
}

if (this.params.behaviorOpts) {
if (this.params.behaviorOpts && data.status < 400) {
if (!data.isHTMLPage) {
logger.debug(
"Skipping behaviors for non-HTML page",
Expand Down
17 changes: 12 additions & 5 deletions src/create-login-profile.ts
Expand Up @@ -223,15 +223,11 @@ async function main() {
);
}

logger.info(`Loading page: ${params.url}`);

await page.goto(params.url, { waitUntil });

if (!params.automated) {
const target = await cdp.send("Target.getTargetInfo");
const targetId = target.targetInfo.targetId;

new InteractiveBrowser(params, browser, page, cdp, targetId);
new InteractiveBrowser(params, browser, page, cdp, targetId, waitUntil);
} else {
await automatedProfile(params, browser, page, cdp, waitUntil);
}
Expand All @@ -248,6 +244,10 @@ async function automatedProfile(
) {
let u, p;

logger.info(`Loading page: ${params.url}`);

await page.goto(params.url, { waitUntil });

logger.debug("Looking for username and password entry fields on page...");

try {
Expand Down Expand Up @@ -372,6 +372,7 @@ class InteractiveBrowser {
page: Page,
cdp: CDPSession,
targetId: string,
waitUntil: PuppeteerLifeCycleEvent = "load",
) {
logger.info("Creating Profile Interactively...");
child_process.spawn("socat", [
Expand Down Expand Up @@ -427,6 +428,12 @@ class InteractiveBrowser {
} else {
logger.info("Screencasting with CDP on port 9222");
}

logger.info(`Loading page: ${params.url}`);

page.goto(params.url, { waitUntil, timeout: 0 }).finally(() => {
logger.info("Loaded!");
});
}

handlePageLoad() {
Expand Down
18 changes: 17 additions & 1 deletion src/util/browser.ts
Expand Up @@ -85,6 +85,10 @@ export class Browser {

const args = this.chromeArgs(chromeOptions);

if (recording) {
args.push("--disable-site-isolation-trials");
}

let defaultViewport = null;

if (process.env.GEOMETRY) {
Expand All @@ -107,11 +111,24 @@ export class Browser {
defaultViewport,
waitForInitialPage: false,
userDataDir: this.profileDir,
targetFilter: recording
? undefined
: (target) => this.targetFilter(target),
};

await this._init(launchOpts, ondisconnect, recording);
}

targetFilter(target: Target) {
const attach = !(!target.url() && target.type() === "other");
logger.debug(
"Target Filter",
{ url: target.url(), type: target.type(), attach },
"browser",
);
return attach;
}

async setupPage({ page }: { page: Page; cdp: CDPSession }) {
await this.addInitScript(
page,
Expand Down Expand Up @@ -215,7 +232,6 @@ export class Browser {
"--remote-debugging-port=9221",
"--remote-allow-origins=*",
"--autoplay-policy=no-user-gesture-required",
"--disable-site-isolation-trials",
`--user-agent=${userAgent || this.getDefaultUA()}`,
...extraArgs,
];
Expand Down

0 comments on commit ea098b6

Please sign in to comment.