Skip to content

Commit

Permalink
feat: add proxy rotation (settable per store) (#1026)
Browse files Browse the repository at this point in the history
  • Loading branch information
Doridian committed Dec 2, 2020
1 parent 65df944 commit 490d44e
Show file tree
Hide file tree
Showing 8 changed files with 498 additions and 49 deletions.
311 changes: 304 additions & 7 deletions package-lock.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Expand Up @@ -45,6 +45,7 @@
"puppeteer-extra-plugin-adblocker": "^2.11.9",
"puppeteer-extra-plugin-block-resources": "^2.2.7",
"puppeteer-extra-plugin-stealth": "^2.6.5",
"puppeteer-page-proxy": "^1.2.8",
"pushover-notifications": "^1.2.2",
"twilio": "^3.52.0",
"twitch": "^4.3.2",
Expand Down
15 changes: 13 additions & 2 deletions src/adblocker.ts
Expand Up @@ -5,9 +5,20 @@ export const adBlocker = new PuppeteerExtraPluginAdblocker({
blockTrackers: true
});

export async function disableBlockerInPage(page: Page) {
export async function enableBlockerInPage(page: Page) {
const blockerObject = await adBlocker.getBlocker();
if (blockerObject.isBlockingEnabled(page)) {
await blockerObject.disableBlockingInPage(page);
return;
}

await blockerObject.enableBlockingInPage(page);
}

export async function disableBlockerInPage(page: Page) {
const blockerObject = await adBlocker.getBlocker();
if (!blockerObject.isBlockingEnabled(page)) {
return;
}

await blockerObject.disableBlockingInPage(page);
}
14 changes: 13 additions & 1 deletion src/config.ts
Expand Up @@ -2,6 +2,7 @@ import {banner} from './banner';

import {config as config_} from 'dotenv';
import path from 'path';
import {readFileSync} from 'fs';

config_({path: path.resolve(__dirname, '../.env')});

Expand Down Expand Up @@ -354,6 +355,16 @@ const store = {
]),
stores: envOrArray(process.env.STORES, ['nvidia']).map((entry) => {
const [name, minPageSleep, maxPageSleep] = entry.match(/[^:]+/g) ?? [];

let proxyList;
try {
proxyList = readFileSync(`${name}.proxies`)
.toString()
.trim()
.split('\n')
.map((x) => x.trim());
} catch {}

return {
maxPageSleep: envOrNumberMax(
minPageSleep,
Expand All @@ -365,7 +376,8 @@ const store = {
maxPageSleep,
browser.minSleep
),
name: envOrString(name)
name: envOrString(name),
proxyList
};
})
};
Expand Down
11 changes: 0 additions & 11 deletions src/index.ts
@@ -1,25 +1,14 @@
import {startAPIServer, stopAPIServer} from './web';
import {Browser} from 'puppeteer';
import {adBlocker} from './adblocker';
import {config} from './config';
import {getSleepTime} from './util';
import {logger} from './logger';
import puppeteer from 'puppeteer-extra';
import resourceBlock from 'puppeteer-extra-plugin-block-resources';
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
import {storeList} from './store/model';
import {tryLookupAndLoop} from './store';

puppeteer.use(stealthPlugin());
if (config.browser.lowBandwidth) {
puppeteer.use(
resourceBlock({
blockedTypes: new Set(['image', 'font'] as const)
})
);
} else {
puppeteer.use(adBlocker);
}

let browser: Browser | undefined;

Expand Down
189 changes: 162 additions & 27 deletions src/store/lookup.ts
@@ -1,4 +1,4 @@
import {Browser, Page, Response} from 'puppeteer';
import {Browser, Page, PageEventObj, Request, Response} from 'puppeteer';
import {Link, Store, getStores} from './model';
import {Print, logger} from '../logger';
import {Selector, cardPrice, pageIncludesLabels} from './includes-labels';
Expand All @@ -9,18 +9,109 @@ import {
getSleepTime,
isStatusCodeInRange
} from '../util';
import {disableBlockerInPage, enableBlockerInPage} from '../adblocker';
import {config} from '../config';
import {disableBlockerInPage} from '../adblocker';
import {fetchLinks} from './fetch-links';
import {filterStoreLink} from './filter';
import open from 'open';
import {processBackoffDelay} from './model/helpers/backoff';
import {sendNotification} from '../notification';
import useProxy from 'puppeteer-page-proxy';

const inStock: Record<string, boolean> = {};

const linkBuilderLastRunTimes: Record<string, number> = {};

function nextProxy(store: Store) {
if (!store.proxyList) {
return;
}

if (store.currentProxyIndex === undefined) {
store.currentProxyIndex = 0;
}

store.currentProxyIndex++;
if (store.currentProxyIndex >= store.proxyList.length) {
store.currentProxyIndex = 0;
}

logger.info(
`ℹ [${store.name}] Next proxy index: ${store.currentProxyIndex} / Count: ${store.proxyList.length}`
);

return store.proxyList[store.currentProxyIndex];
}

async function handleLowBandwidth(request: Request) {
if (!config.browser.lowBandwidth) {
return false;
}

const typ = request.resourceType();
if (typ === 'font' || typ === 'image') {
try {
await request.abort();
} catch {}

return true;
}

return false;
}

async function handleProxy(request: Request, proxy?: string) {
if (!proxy) {
return false;
}

try {
await useProxy(request, proxy);
} catch (error: unknown) {
logger.error(error);
try {
await request.abort();
} catch {}
}

return true;
}

async function handleAdBlock(request: Request, adBlockRequestHandler: any) {
if (!adBlockRequestHandler) {
return false;
}

return new Promise((resolve) => {
const continueFunc = async () => {
resolve(false);
};

const abortFunc = async () => {
try {
await request.abort();
} catch {}

resolve(true);
};

const requestProxy = new Proxy(request, {
get(target, prop, receiver) {
if (prop === 'continue') {
return continueFunc;
}

if (prop === 'abort') {
return abortFunc;
}

return Reflect.get(target, prop, receiver);
}
});
adBlockRequestHandler(requestProxy);
});
}

/**
* Responsible for looking up information about a each product within
* a `Store`. It's important that we ignore `no-await-in-loop` here
Expand All @@ -34,6 +125,20 @@ async function lookup(browser: Browser, store: Store) {
return;
}

if (store.linksBuilder) {
logger.info(`[${store.name}] Running linksBuilder...`);
const lastRunTime = linkBuilderLastRunTimes[store.name] ?? -1;
const ttl = store.linksBuilder.ttl ?? Number.MAX_SAFE_INTEGER;
if (lastRunTime === -1 || Date.now() - lastRunTime > ttl) {
try {
await fetchLinks(store, browser);
linkBuilderLastRunTimes[store.name] = Date.now();
} catch (error: unknown) {
logger.error(error);
}
}
}

/* eslint-disable no-await-in-loop */
for (const link of store.links) {
if (!filterStoreLink(link)) {
Expand All @@ -45,23 +150,62 @@ async function lookup(browser: Browser, store: Store) {
continue;
}

const context = config.browser.isIncognito
const proxy = nextProxy(store);

const useAdBlock = !config.browser.lowBandwidth && !store.disableAdBlocker;
const customContext = config.browser.isIncognito;

const context = customContext
? await browser.createIncognitoBrowserContext()
: browser.defaultBrowserContext();
const page = config.browser.isIncognito
? await context.newPage()
: await browser.newPage();
const page = await context.newPage();

page.setDefaultNavigationTimeout(config.page.timeout);
await page.setUserAgent(getRandomUserAgent());

if (store.disableAdBlocker) {
try {
await disableBlockerInPage(page);
} catch (error: unknown) {
logger.error(error);
}
let adBlockRequestHandler: any;
let pageProxy;
if (useAdBlock) {
const onProxyFunc = (event: keyof PageEventObj, handler: any) => {
if (event !== 'request') {
page.on(event, handler);
return;
}

adBlockRequestHandler = handler;
};

pageProxy = new Proxy(page, {
get(target, prop, receiver) {
if (prop === 'on') {
return onProxyFunc;
}

return Reflect.get(target, prop, receiver);
}
});
await enableBlockerInPage(pageProxy);
}

await page.setRequestInterception(true);
page.on('request', async (request) => {
if (await handleLowBandwidth(request)) {
return;
}

if (await handleAdBlock(request, adBlockRequestHandler)) {
return;
}

if (await handleProxy(request, proxy)) {
return;
}

try {
await request.continue();
} catch {}
});

let statusCode = 0;

try {
Expand All @@ -74,15 +218,19 @@ async function lookup(browser: Browser, store: Store) {
);
const client = await page.target().createCDPSession();
await client.send('Network.clearBrowserCookies');
await client.send('Network.clearBrowserCache');
// Await client.send('Network.clearBrowserCache');
}

if (pageProxy) {
await disableBlockerInPage(pageProxy);
}

// Must apply backoff before closing the page, e.g. if CloudFlare is
// used to detect bot traffic, it introduces a 5 second page delay
// before redirecting to the next page
await processBackoffDelay(store, link, statusCode);
await closePage(page);
if (config.browser.isIncognito) {
if (customContext) {
await context.close();
}
}
Expand Down Expand Up @@ -223,19 +371,6 @@ export async function tryLookupAndLoop(browser: Browser, store: Store) {
return;
}

if (getStores().has(store.name) && store.linksBuilder) {
const lastRunTime = linkBuilderLastRunTimes[store.name] ?? -1;
const ttl = store.linksBuilder.ttl ?? Number.MAX_SAFE_INTEGER;
if (lastRunTime === -1 || Date.now() - lastRunTime > ttl) {
try {
await fetchLinks(store, browser);
linkBuilderLastRunTimes[store.name] = Date.now();
} catch (error: unknown) {
logger.error((error as Error).message);
}
}
}

logger.debug(`[${store.name}] Starting lookup...`);
try {
await lookup(browser, store);
Expand Down
1 change: 1 addition & 0 deletions src/store/model/index.ts
Expand Up @@ -236,6 +236,7 @@ export function updateStores() {
stores.set(storeData.name, store);
store.minPageSleep = storeData.minPageSleep;
store.maxPageSleep = storeData.maxPageSleep;
store.proxyList = storeData.proxyList;
} else {
logger.warn(`No store named ${storeData.name}, skipping.`);
}
Expand Down
5 changes: 4 additions & 1 deletion src/store/model/store.ts
@@ -1,4 +1,4 @@
import {Browser, LoadEvent} from 'puppeteer';
import {Browser, BrowserContext, LoadEvent} from 'puppeteer';

export type Element = {
container?: string;
Expand Down Expand Up @@ -178,4 +178,7 @@ export type Store = {
waitUntil?: LoadEvent;
minPageSleep?: number;
maxPageSleep?: number;

proxyList?: string[];
currentProxyIndex?: number;
};

0 comments on commit 490d44e

Please sign in to comment.