Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add proxy rotation (settable per store) #1026

Merged
merged 5 commits into from
Dec 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
311 changes: 304 additions & 7 deletions package-lock.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"puppeteer-extra-plugin-adblocker": "^2.11.9",
"puppeteer-extra-plugin-block-resources": "^2.2.7",
"puppeteer-extra-plugin-stealth": "^2.6.5",
"puppeteer-page-proxy": "^1.2.8",
"pushover-notifications": "^1.2.2",
"twilio": "^3.52.0",
"twitch": "^4.3.2",
Expand Down
15 changes: 13 additions & 2 deletions src/adblocker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,20 @@ export const adBlocker = new PuppeteerExtraPluginAdblocker({
blockTrackers: true
});

export async function disableBlockerInPage(page: Page) {
export async function enableBlockerInPage(page: Page) {
const blockerObject = await adBlocker.getBlocker();
if (blockerObject.isBlockingEnabled(page)) {
await blockerObject.disableBlockingInPage(page);
return;
}

await blockerObject.enableBlockingInPage(page);
}

export async function disableBlockerInPage(page: Page) {
const blockerObject = await adBlocker.getBlocker();
if (!blockerObject.isBlockingEnabled(page)) {
return;
}

await blockerObject.disableBlockingInPage(page);
}
14 changes: 13 additions & 1 deletion src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {banner} from './banner';

import {config as config_} from 'dotenv';
import path from 'path';
import {readFileSync} from 'fs';

config_({path: path.resolve(__dirname, '../.env')});

Expand Down Expand Up @@ -354,6 +355,16 @@ const store = {
]),
stores: envOrArray(process.env.STORES, ['nvidia']).map((entry) => {
const [name, minPageSleep, maxPageSleep] = entry.match(/[^:]+/g) ?? [];

let proxyList;
try {
proxyList = readFileSync(`${name}.proxies`)
.toString()
.trim()
.split('\n')
.map((x) => x.trim());
} catch {}

return {
maxPageSleep: envOrNumberMax(
minPageSleep,
Expand All @@ -365,7 +376,8 @@ const store = {
maxPageSleep,
browser.minSleep
),
name: envOrString(name)
name: envOrString(name),
proxyList
};
})
};
Expand Down
11 changes: 0 additions & 11 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,14 @@
import {startAPIServer, stopAPIServer} from './web';
import {Browser} from 'puppeteer';
import {adBlocker} from './adblocker';
import {config} from './config';
import {getSleepTime} from './util';
import {logger} from './logger';
import puppeteer from 'puppeteer-extra';
import resourceBlock from 'puppeteer-extra-plugin-block-resources';
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
import {storeList} from './store/model';
import {tryLookupAndLoop} from './store';

puppeteer.use(stealthPlugin());
if (config.browser.lowBandwidth) {
puppeteer.use(
resourceBlock({
blockedTypes: new Set(['image', 'font'] as const)
})
);
} else {
puppeteer.use(adBlocker);
}

let browser: Browser | undefined;

Expand Down
189 changes: 162 additions & 27 deletions src/store/lookup.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {Browser, Page, Response} from 'puppeteer';
import {Browser, Page, PageEventObj, Request, Response} from 'puppeteer';
import {Link, Store, getStores} from './model';
import {Print, logger} from '../logger';
import {Selector, cardPrice, pageIncludesLabels} from './includes-labels';
Expand All @@ -9,18 +9,109 @@ import {
getSleepTime,
isStatusCodeInRange
} from '../util';
import {disableBlockerInPage, enableBlockerInPage} from '../adblocker';
import {config} from '../config';
import {disableBlockerInPage} from '../adblocker';
import {fetchLinks} from './fetch-links';
import {filterStoreLink} from './filter';
import open from 'open';
import {processBackoffDelay} from './model/helpers/backoff';
import {sendNotification} from '../notification';
import useProxy from 'puppeteer-page-proxy';

const inStock: Record<string, boolean> = {};

const linkBuilderLastRunTimes: Record<string, number> = {};

function nextProxy(store: Store) {
if (!store.proxyList) {
return;
}

if (store.currentProxyIndex === undefined) {
store.currentProxyIndex = 0;
}

store.currentProxyIndex++;
if (store.currentProxyIndex >= store.proxyList.length) {
store.currentProxyIndex = 0;
}

logger.info(
`ℹ [${store.name}] Next proxy index: ${store.currentProxyIndex} / Count: ${store.proxyList.length}`
);

return store.proxyList[store.currentProxyIndex];
}

async function handleLowBandwidth(request: Request) {
if (!config.browser.lowBandwidth) {
return false;
}

const typ = request.resourceType();
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this because type is a reserved word? Haha 😝

if (typ === 'font' || typ === 'image') {
try {
await request.abort();
} catch {}

return true;
}

return false;
}

async function handleProxy(request: Request, proxy?: string) {
if (!proxy) {
return false;
}

try {
await useProxy(request, proxy);
} catch (error: unknown) {
logger.error(error);
try {
await request.abort();
} catch {}
}

return true;
}

async function handleAdBlock(request: Request, adBlockRequestHandler: any) {
if (!adBlockRequestHandler) {
return false;
}

return new Promise((resolve) => {
const continueFunc = async () => {
resolve(false);
};

const abortFunc = async () => {
try {
await request.abort();
} catch {}

resolve(true);
};

const requestProxy = new Proxy(request, {
get(target, prop, receiver) {
if (prop === 'continue') {
return continueFunc;
}

if (prop === 'abort') {
return abortFunc;
}

return Reflect.get(target, prop, receiver);
}
});
adBlockRequestHandler(requestProxy);
});
}

/**
* Responsible for looking up information about a each product within
* a `Store`. It's important that we ignore `no-await-in-loop` here
Expand All @@ -34,6 +125,20 @@ async function lookup(browser: Browser, store: Store) {
return;
}

if (store.linksBuilder) {
logger.info(`[${store.name}] Running linksBuilder...`);
const lastRunTime = linkBuilderLastRunTimes[store.name] ?? -1;
const ttl = store.linksBuilder.ttl ?? Number.MAX_SAFE_INTEGER;
if (lastRunTime === -1 || Date.now() - lastRunTime > ttl) {
try {
await fetchLinks(store, browser);
linkBuilderLastRunTimes[store.name] = Date.now();
} catch (error: unknown) {
logger.error(error);
}
}
}

/* eslint-disable no-await-in-loop */
for (const link of store.links) {
if (!filterStoreLink(link)) {
Expand All @@ -45,23 +150,62 @@ async function lookup(browser: Browser, store: Store) {
continue;
}

const context = config.browser.isIncognito
const proxy = nextProxy(store);

const useAdBlock = !config.browser.lowBandwidth && !store.disableAdBlocker;
const customContext = config.browser.isIncognito;

const context = customContext
? await browser.createIncognitoBrowserContext()
: browser.defaultBrowserContext();
const page = config.browser.isIncognito
? await context.newPage()
: await browser.newPage();
const page = await context.newPage();

page.setDefaultNavigationTimeout(config.page.timeout);
await page.setUserAgent(getRandomUserAgent());

if (store.disableAdBlocker) {
try {
await disableBlockerInPage(page);
} catch (error: unknown) {
logger.error(error);
}
let adBlockRequestHandler: any;
let pageProxy;
if (useAdBlock) {
const onProxyFunc = (event: keyof PageEventObj, handler: any) => {
if (event !== 'request') {
page.on(event, handler);
return;
}

adBlockRequestHandler = handler;
};

pageProxy = new Proxy(page, {
get(target, prop, receiver) {
if (prop === 'on') {
return onProxyFunc;
}

return Reflect.get(target, prop, receiver);
}
});
await enableBlockerInPage(pageProxy);
}

await page.setRequestInterception(true);
page.on('request', async (request) => {
if (await handleLowBandwidth(request)) {
return;
}

if (await handleAdBlock(request, adBlockRequestHandler)) {
return;
}

if (await handleProxy(request, proxy)) {
return;
}

try {
await request.continue();
} catch {}
});

let statusCode = 0;

try {
Expand All @@ -74,15 +218,19 @@ async function lookup(browser: Browser, store: Store) {
);
const client = await page.target().createCDPSession();
await client.send('Network.clearBrowserCookies');
await client.send('Network.clearBrowserCache');
// Await client.send('Network.clearBrowserCache');
}

if (pageProxy) {
await disableBlockerInPage(pageProxy);
}

// Must apply backoff before closing the page, e.g. if CloudFlare is
// used to detect bot traffic, it introduces a 5 second page delay
// before redirecting to the next page
await processBackoffDelay(store, link, statusCode);
await closePage(page);
if (config.browser.isIncognito) {
if (customContext) {
await context.close();
}
}
Expand Down Expand Up @@ -223,19 +371,6 @@ export async function tryLookupAndLoop(browser: Browser, store: Store) {
return;
}

if (getStores().has(store.name) && store.linksBuilder) {
const lastRunTime = linkBuilderLastRunTimes[store.name] ?? -1;
const ttl = store.linksBuilder.ttl ?? Number.MAX_SAFE_INTEGER;
if (lastRunTime === -1 || Date.now() - lastRunTime > ttl) {
try {
await fetchLinks(store, browser);
linkBuilderLastRunTimes[store.name] = Date.now();
} catch (error: unknown) {
logger.error((error as Error).message);
}
}
}

logger.debug(`[${store.name}] Starting lookup...`);
try {
await lookup(browser, store);
Expand Down
1 change: 1 addition & 0 deletions src/store/model/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ export function updateStores() {
stores.set(storeData.name, store);
store.minPageSleep = storeData.minPageSleep;
store.maxPageSleep = storeData.maxPageSleep;
store.proxyList = storeData.proxyList;
} else {
logger.warn(`No store named ${storeData.name}, skipping.`);
}
Expand Down
5 changes: 4 additions & 1 deletion src/store/model/store.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {Browser, LoadEvent} from 'puppeteer';
import {Browser, BrowserContext, LoadEvent} from 'puppeteer';

export type Element = {
container?: string;
Expand Down Expand Up @@ -178,4 +178,7 @@ export type Store = {
waitUntil?: LoadEvent;
minPageSleep?: number;
maxPageSleep?: number;

proxyList?: string[];
currentProxyIndex?: number;
};