From 06f5611817a7a8acd41e2bc6284c979d9bd8e8cd Mon Sep 17 00:00:00 2001 From: Roki <35699619+Roki100@users.noreply.github.com> Date: Sat, 5 Mar 2022 15:56:28 +0100 Subject: [PATCH] feat: add IPv6 block rotating (#713) * IPv6 Rotating ^-^ * linter got mad at missing semicolon nice * Fixing format Fixing the format of files so eslint does not throw any errors. * adding colon * added test for IPv6 Block * Forgot to lint * Info test and download test improvement * use net instead if .includes * remove chunking-related stuff * Update lib/util.js Co-authored-by: fent * improve tests, fix the check in util * add test for invalid subnet * place done's on separate lines * fix typo in readme * fixing README.md * fixed mistake * fixed other mistake * Update README.md Co-authored-by: fent * Update README.md Co-authored-by: fent * fix conflicts * i forgot about this change * Fix undefined testInfo * ignoring invalid this * fix info test * fix million's await * remove useless part * Adding requested test * remove unrelated dependency * remove unused option * update readme * lint ipv6 example * remove ip6 dependencie * improve test coverage * (es)lint * Update lib/utils.js Co-authored-by: Voltrex Co-authored-by: MILLION Co-authored-by: Million900o Co-authored-by: fent Co-authored-by: MILLION <30964205+Million900o@users.noreply.github.com> Co-authored-by: TimeForANinja Co-authored-by: TimeForANinja Co-authored-by: Voltrex --- README.md | 19 +++++++ example/ipv6_rotating.js | 19 +++++++ lib/index.js | 7 +++ lib/info.js | 6 +++ lib/utils.js | 73 +++++++++++++++++++++++++ package.json | 4 +- test/download-test.js | 29 ++++++++++ test/full-info-test.js | 22 ++++++++ test/utils-test.js | 111 +++++++++++++++++++++++++++++++++++++++ typings/index.d.ts | 1 + 10 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 example/ipv6_rotating.js diff --git a/README.md b/README.md index d2cebd47..d563a944 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ Attempts to download a video from the given url. Returns a [readable stream](htt * `liveBuffer` - How much time buffer to use for live videos in milliseconds. Default is `20000`. * `highWaterMark` - How much of the video download to buffer into memory. See [node's docs](https://nodejs.org/api/stream.html#stream_constructor_new_stream_writable_options) for more. Defaults to 512KB. * `dlChunkSize` - When the chosen format is video only or audio only, the download is separated into multiple chunks to avoid throttling. This option specifies the size of each chunk in bytes. Setting it to 0 disables chunking. Defaults to 10MB. +* `IPv6Block` - IPv6 block to rotate through, an alternative to using a proxy. [Read more](#How-does-using-an-IPv6-block-help?). Defaults to `undefined`. #### Event: info * [`ytdl.videoInfo`](typings/index.d.ts#L194) - Info. @@ -156,6 +157,24 @@ ytdl cannot download videos that fall into the following Generated download links are valid for 6 hours, and may only be downloadable from the same IP address. +### Ratelimits +When doing to many requests YouTube might block. This will result in your requests getting denied with HTTP-StatusCode 429. The following Steps might help you: +* Update ytdl-core to the latest version +* Use proxies (you can find an example [here](https://github.com/fent/node-ytdl-core/blob/master/example/proxy.js)) +* Extend on the Proxy Idea by rotating (IPv6-)Addresses + * read [this](#How-does-using-an-IPv6-block-help?) for more information about this +* Use cookies (you can find an example [here](https://github.com/fent/node-ytdl-core/blob/master/example/cookies.js)) + * for this to take effect you have to FIRST wait for the current ratelimit to expire +* Wait it out (it usually goes away within a few days) + +#### How does using an IPv6 block help? + +For request-intensive tasks it might be useful to spread your requests across multiple source IP-Addresses. Changing the source IP that you use is similar to using a proxy, except without bypassing restrictions such as a region lock. More IP-Addresses result in less requests per IP and therefor increase your ratelimit. Since IPv4 Addresses are a limited Resource we advise to use IPv6. + +Using an IPv6 block is essentially having millions of IPv6 addresses at your request. In a /64 IPv6 block (which is usually the Block given to a single Household), there are 18,446,744,073,709,551,616 unique IPv6 addresses. This would allow you to make each request with a different IPv6 address. + +Even though using an IP-Block does help against ratelimits it requires you to setup your host system to accept http traffic from every message in an IP-Block. We can not help you with the setup for any specific host / hosting provider but searching the internet most likely can. + ## Handling Separate Streams Typically 1080p or better videos do not have audio encoded with it. The audio must be downloaded separately and merged via an encoding library. `ffmpeg` is the most widely used tool, with many [Node.js modules available](https://www.npmjs.com/search?q=ffmpeg). Use the `format` objects returned from `ytdl.getInfo` to download specific streams to combine to fit your needs. Look at [example/ffmpeg.js](example/ffmpeg.js) for an example on doing this. diff --git a/example/ipv6_rotating.js b/example/ipv6_rotating.js new file mode 100644 index 00000000..f55de8d1 --- /dev/null +++ b/example/ipv6_rotating.js @@ -0,0 +1,19 @@ +const fs = require('fs'); +const path = require('path'); +const ytdl = require('..'); + +const options = { + quality: 'highest', + IPv6Block: '2001:2::/48', + // Example /48 block provided by: + // https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml +}; +const url = 'https://www.youtube.com/watch?v=WhXefyLs-uw'; +const output = path.resolve(__dirname, 'video.mp4'); + +const video = ytdl(url, options); +video.pipe(fs.createWriteStream(output)); +console.log('Downloading...'); +video.on('end', () => { + console.log('Finished downloading.'); +}); diff --git a/lib/index.js b/lib/index.js index 0cfbe5ba..9d4c7a21 100644 --- a/lib/index.js +++ b/lib/index.js @@ -97,6 +97,13 @@ const downloadFromInfoCallback = (stream, info, options) => { stream.emit('progress', chunk.length, downloaded, contentLength); }; + if (options.IPv6Block) { + options.requestOptions = Object.assign({}, options.requestOptions, { + family: 6, + localAddress: utils.getRandomIPv6(options.IPv6Block), + }); + } + // Download the file in chunks, in this case the default is 10MB, // anything over this will cause youtube to throttle the download const dlChunkSize = options.dlChunkSize || 1024 * 1024 * 10; diff --git a/lib/info.js b/lib/info.js index aed9617f..5ddd8c23 100644 --- a/lib/info.js +++ b/lib/info.js @@ -43,6 +43,12 @@ const AGE_RESTRICTED_URLS = [ * @returns {Promise} */ exports.getBasicInfo = async(id, options) => { + if (options.IPv6Block) { + options.requestOptions = Object.assign({}, options.requestOptions, { + family: 6, + localAddress: utils.getRandomIPv6(options.IPv6Block), + }); + } const retryOptions = Object.assign({}, miniget.defaultOptions, options.requestOptions); options.requestOptions = Object.assign({}, options.requestOptions, {}); options.requestOptions.headers = Object.assign({}, diff --git a/lib/utils.js b/lib/utils.js index 042de5ae..2bd5ae2c 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -181,3 +181,76 @@ exports.checkForUpdates = () => { } return null; }; + + +/** + * Gets random IPv6 Address from a block + * + * @param {string} ip the IPv6 block in CIDR-Notation + * @returns {string} + */ +exports.getRandomIPv6 = ip => { + // Start with a fast Regex-Check + if (!isIPv6(ip)) throw Error('Invalid IPv6 format'); + // Start by splitting and normalizing addr and mask + const [rawAddr, rawMask] = ip.split('/'); + let base10Mask = parseInt(rawMask); + if (!base10Mask || base10Mask > 128 || base10Mask < 24) throw Error('Invalid IPv6 subnet'); + const base10addr = normalizeIP(rawAddr); + // Get random addr to pad with + // using Math.random since we're not requiring high level of randomness + const randomAddr = new Array(8).fill(1).map(() => Math.floor(Math.random() * 0xffff)); + + // Merge base10addr with randomAddr + const mergedAddr = randomAddr.map((randomItem, idx) => { + // Calculate the amount of static bits + const staticBits = Math.min(base10Mask, 16); + // Adjust the bitmask with the staticBits + base10Mask -= staticBits; + // Calculate the bitmask + // lsb makes the calculation way more complicated + const mask = 0xffff - ((2 ** (16 - staticBits)) - 1); + // Combine base10addr and random + return (base10addr[idx] & mask) + (randomItem & (mask ^ 0xffff)); + }); + // Return new addr + return mergedAddr.map(x => x.toString('16')).join(':'); +}; + + +// eslint-disable-next-line max-len +const IPV6_REGEX = /^(([0-9a-f]{1,4}:)(:[0-9a-f]{1,4}){1,6}|([0-9a-f]{1,4}:){1,2}(:[0-9a-f]{1,4}){1,5}|([0-9a-f]{1,4}:){1,3}(:[0-9a-f]{1,4}){1,4}|([0-9a-f]{1,4}:){1,4}(:[0-9a-f]{1,4}){1,3}|([0-9a-f]{1,4}:){1,5}(:[0-9a-f]{1,4}){1,2}|([0-9a-f]{1,4}:){1,6}(:[0-9a-f]{1,4})|([0-9a-f]{1,4}:){1,7}(([0-9a-f]{1,4})|:))\/(1[0-1]\d|12[0-8]|\d{1,2})$/; +/** + * Quick check for a valid IPv6 + * The Regex only accepts a subset of all IPv6 Addresses + * + * @param {string} ip the IPv6 block in CIDR-Notation to test + * @returns {boolean} true if valid + */ +const isIPv6 = exports.isIPv6 = ip => IPV6_REGEX.test(ip); + + +/** + * Normalise an IP Address + * + * @param {string} ip the IPv6 Addr + * @returns {number[]} the 8 parts of the IPv6 as Integers + */ +const normalizeIP = exports.normalizeIP = ip => { + // Split by fill position + const parts = ip.split('::').map(x => x.split(':')); + // Normalize start and end + const partStart = parts[0] || []; + const partEnd = parts[1] || []; + partEnd.reverse(); + // Placeholder for full ip + const fullIP = new Array(8).fill(0); + // Fill in start and end parts + for (let i = 0; i < Math.min(partStart.length, 8); i++) { + fullIP[i] = parseInt(partStart[i], 16) || 0; + } + for (let i = 0; i < Math.min(partEnd.length, 8); i++) { + fullIP[7 - i] = parseInt(partEnd[i], 16) || 0; + } + return fullIP; +}; diff --git a/package.json b/package.json index 36764b23..b298249b 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,9 @@ "Andrew Kelley (https://github.com/andrewrk)", "Mauricio Allende (https://github.com/mallendeo)", "Rodrigo Altamirano (https://github.com/raltamirano)", - "Jim Buck (https://github.com/JimmyBoh)" + "Jim Buck (https://github.com/JimmyBoh)", + "Paweł Ruciński (https://github.com/Roki100)", + "Alexander Paolini (https://github.com/Million900o)" ], "main": "./lib/index.js", "types": "./typings/index.d.ts", diff --git a/test/download-test.js b/test/download-test.js index fb72082e..96d23fd7 100644 --- a/test/download-test.js +++ b/test/download-test.js @@ -5,6 +5,7 @@ const streamEqual = require('stream-equal'); const sinon = require('sinon'); const nock = require('./nock'); const ytdl = require('..'); +const net = require('net'); describe('Download video', () => { @@ -545,6 +546,34 @@ describe('Download video', () => { }); }); + describe('With IPv6 Block', () => { + it('Sends request with IPv6 address', done => { + const stream = ytdl.downloadFromInfo(expectedInfo, { IPv6Block: '2001:2::/48' }); + stream.on('info', (info, format) => { + nock.url(format.url).reply(function checkAddr() { + // "this" is assigned by the function checkAddr + // eslint-disable-next-line no-invalid-this + assert.ok(net.isIPv6(this.req.options.localAddress)); + done(); + }); + }); + }); + }); + + describe('Without IPv6 Block', () => { + it('Sends request with (default) IPv4 address', done => { + const stream = ytdl.downloadFromInfo(expectedInfo); + stream.on('info', (info, format) => { + nock.url(format.url).reply(function checkAddr() { + // "this" is assigned by the function checkAddr + // eslint-disable-next-line no-invalid-this + assert.ok(this.req.options.localAddress === undefined); + done(); + }); + }); + }); + }); + describe('with a bad filter', () => { it('Emits error', done => { const stream = ytdl.downloadFromInfo(expectedInfo, { filter: () => false }); diff --git a/test/full-info-test.js b/test/full-info-test.js index a99f020a..cfbbf278 100644 --- a/test/full-info-test.js +++ b/test/full-info-test.js @@ -2,6 +2,7 @@ const ytdl = require('..'); const assert = require('assert-diff'); const nock = require('./nock'); const miniget = require('miniget'); +const net = require('net'); describe('ytdl.getInfo()', () => { @@ -39,6 +40,27 @@ describe('ytdl.getInfo()', () => { }); }); + describe('With IPv6 Block', () => { + it('Sends request with IPv6 address', async() => { + const id = '_HSylqgVYQI'; + const scope = nock(id, 'regular'); + let info = await ytdl.getInfo(id, { IPv6Block: '2001:2::/48' }); + nock.url(info.formats[0].url).reply(function checkAddr() { + // "this" is assigned by the function checkAddr + // eslint-disable-next-line no-invalid-this + assert.ok(net.isIPv6(this.req.options.localAddress)); + scope.done(); + }); + }); + }); + + describe('With invalid IPv6 Block', () => { + it('Should give an error', async() => { + const id = '_HSylqgVYQI'; + await assert.rejects(ytdl.getInfo(id, { IPv6Block: '2001:2::/200' }), /Invalid IPv6 format/); + }); + }); + describe('From a video with a cipher', () => { it('Retrieves deciphered video formats', async() => { const id = 'B3eAMGXFw1o'; diff --git a/test/utils-test.js b/test/utils-test.js index f5e26d32..f7d9b12b 100644 --- a/test/utils-test.js +++ b/test/utils-test.js @@ -181,6 +181,117 @@ describe('utils.checkForUpdates', () => { }); }); +describe('utils.isIPv6', () => { + it('returns true for valid IPv6 net', () => { + assert.ok(utils.isIPv6('100::/128')); + assert.ok(utils.isIPv6('100::/119')); + assert.ok(utils.isIPv6('100::/13')); + assert.ok(utils.isIPv6('100::/1')); + assert.ok(utils.isIPv6('20a::/13')); + assert.ok(utils.isIPv6('0064:ff9b:0000:0000:0000:0000:1234:5678/13')); + assert.ok(utils.isIPv6('0064:ff9b:0001:1122:0033:4400:0000:0001/13')); + assert.ok(utils.isIPv6('fe80:4:6c:8c74:0000:5efe:afef:a89/13')); + assert.ok(utils.isIPv6('fe80:4:6c:8c74:0000:5efe::a89/13')); + assert.ok(utils.isIPv6('fe80:4:6c:8c74:0000::a89/13')); + assert.ok(utils.isIPv6('fe80:4:6c:8c74::a89/13')); + assert.ok(utils.isIPv6('fe80:4:6c::a89/13')); + assert.ok(utils.isIPv6('fe80:4::a89/13')); + assert.ok(utils.isIPv6('fe80::a89/13')); + assert.ok(utils.isIPv6('fe80::/13')); + assert.ok(utils.isIPv6('fea3:c65:43ee:54:e2a:2357:4ac4:732/13')); + assert.ok(utils.isIPv6('fe80:1234:abc/13')); + assert.ok(utils.isIPv6('20a:1234::1/13')); + }); + + it('returns false for valid but unwanted IPv6 net', () => { + assert.ok(!utils.isIPv6('::/1')); + assert.ok(!utils.isIPv6('::1/1')); + assert.ok(!utils.isIPv6('::ffff:10.0.0.3/1')); + assert.ok(!utils.isIPv6('::10.0.0.3/1')); + assert.ok(!utils.isIPv6('127.0.0.1/1')); + assert.ok(!utils.isIPv6('24a6:57:c:36cf:0000:5efe:109.205.140.116/64')); + }); + + it('returns false for invalid IPv6 net', () => { + assert.ok(!utils.isIPv6('100::/129')); + assert.ok(!utils.isIPv6('100::/130')); + assert.ok(!utils.isIPv6('100::/abc')); + assert.ok(!utils.isIPv6('100::')); + assert.ok(!utils.isIPv6('fe80:4::8c74::5efe:afef:a89/64')); + assert.ok(!utils.isIPv6('24a6:57:c:36cf:0000:5efe:ab:cd:ef/64')); + assert.ok(!utils.isIPv6('24a6:57:c:36cf:0000:5efe::ab:cd/64')); + }); +}); + +describe('utils.getRandomIPv6', () => { + it('errors for completely invalid ipv6', () => { + assert.throws(() => { + utils.getRandomIPv6('some random string'); + }, /Invalid IPv6 format/); + }); + + it('errors for invalid subnet sizes', () => { + assert.throws(() => { + utils.getRandomIPv6('fe80::/300'); + }, /Invalid IPv6 format/); + assert.throws(() => { + utils.getRandomIPv6('127::1/1'); + }, /Invalid IPv6 subnet/); + assert.throws(() => { + utils.getRandomIPv6('fe80::'); + }, /Invalid IPv6 format/); + assert.throws(() => { + utils.getRandomIPv6('fe80::/ff'); + }, /Invalid IPv6 format/); + }); + + it('keeps the upper bits of the subnet', () => { + for (let i = 24; i < 128; i++) { + const ip = utils.getRandomIPv6(`ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/${i}`); + const bits = ip.split(':').map(x => parseInt(x, 16).toString(2)).join(''); + assert.equal(bits.substr(0, i), '1'.repeat(i)); + } + }); + + it('rolls random bits for the lower bits', () => { + // Only testing to 64 and not 128 + // The second part of the random IP is tested to not be only onces + // and rolling 8 full 0xff bytes should be unlikely enough + for (let i = 24; i < 64; i++) { + const ip = utils.getRandomIPv6(`ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/${i}`); + const bits = ip.split(':').map(x => parseInt(x, 16).toString(2)).join(''); + assert.ok(bits.substr(i).split('').some(x => x === '0')); + } + }); +}); + +describe('utils.normalizeIP', () => { + it('does work for already expanded ips', () => { + assert.deepEqual(utils.normalizeIP('1:2:3:4:5:6:7:8'), [1, 2, 3, 4, 5, 6, 7, 8]); + }); + + it('resolves bytes to integers', () => { + assert.deepEqual(utils.normalizeIP('ffff'), [65535, 0, 0, 0, 0, 0, 0, 0]); + }); + + it('expands ::', () => { + assert.deepEqual(utils.normalizeIP('ab::cd'), [171, 0, 0, 0, 0, 0, 0, 205]); + assert.deepEqual(utils.normalizeIP('ab:cd::ef'), [171, 205, 0, 0, 0, 0, 0, 239]); + assert.deepEqual(utils.normalizeIP('ab:cd::12:ef'), [171, 205, 0, 0, 0, 0, 18, 239]); + assert.deepEqual(utils.normalizeIP('ab:cd::'), [171, 205, 0, 0, 0, 0, 0, 0]); + assert.deepEqual(utils.normalizeIP('123::'), [291, 0, 0, 0, 0, 0, 0, 0]); + assert.deepEqual(utils.normalizeIP('0::'), [0, 0, 0, 0, 0, 0, 0, 0]); + assert.deepEqual(utils.normalizeIP('::'), [0, 0, 0, 0, 0, 0, 0, 0]); + assert.deepEqual(utils.normalizeIP('::ab:cd'), [0, 0, 0, 0, 0, 0, 171, 205]); + }); + + it('does handle invalid ips', () => { + assert.deepEqual(utils.normalizeIP('1:2:3:4:5::6:7:8::'), [1, 2, 3, 4, 5, 6, 7, 8]); + assert.deepEqual(utils.normalizeIP('::1:2:3:4:5:6:7:8'), [1, 2, 3, 4, 5, 6, 7, 8]); + assert.deepEqual(utils.normalizeIP('1:2:3:4:5::6:7:8:9:10'), [1, 2, 3, 6, 7, 8, 9, 16]); + }); +}); + describe('utils.exposedMiniget', () => { it('does not error with undefined requestOptionsOverwrite', async() => { const scope = nock('https://test.com').get('/').reply(200, 'nice'); diff --git a/typings/index.d.ts b/typings/index.d.ts index 17d9f63d..2a082653 100644 --- a/typings/index.d.ts +++ b/typings/index.d.ts @@ -25,6 +25,7 @@ declare module 'ytdl-core' { begin?: string | number | Date; liveBuffer?: number; highWaterMark?: number; + IPv6Block?: string; dlChunkSize?: number; }