From 9331f5b6244bcd7875b93f0326bc09330d21330e Mon Sep 17 00:00:00 2001 From: fent <933490+fent@users.noreply.github.com> Date: Sun, 24 Jan 2021 15:14:26 -0700 Subject: [PATCH] refactor: use `URL` object to parse urls --- lib/info-extras.js | 21 ++++++++++----------- lib/info.js | 36 +++++++++++++++--------------------- lib/sig.js | 17 ++++++----------- lib/url-utils.js | 11 +++++++---- test/download-test.js | 4 ++-- test/files/refresh.js | 4 ++-- test/nock.js | 12 ++++++------ 7 files changed, 48 insertions(+), 57 deletions(-) diff --git a/lib/info-extras.js b/lib/info-extras.js index ee6d3252..b861d3b4 100644 --- a/lib/info-extras.js +++ b/lib/info-extras.js @@ -1,11 +1,10 @@ const utils = require('./utils'); const qs = require('querystring'); -const urllib = require('url'); const { URL } = require('url'); const { parseTimestamp } = require('m3u8stream'); -const VIDEO_URL = 'https://www.youtube.com/watch?v='; +const BASE_URL = 'https://www.youtube.com/watch?v='; const TITLE_TO_CATEGORY = { song: { name: 'Music', url: 'https://music.youtube.com/' }, }; @@ -42,8 +41,8 @@ exports.getMedia = info => { media[title] = getText(contents); let runs = contents.runs; if (runs && runs[0].navigationEndpoint) { - media[`${title}_url`] = urllib.resolve(VIDEO_URL, - runs[0].navigationEndpoint.commandMetadata.webCommandMetadata.url); + media[`${title}_url`] = new URL( + runs[0].navigationEndpoint.commandMetadata.webCommandMetadata.url, BASE_URL).toString(); } if (title in TITLE_TO_CATEGORY) { media.category = TITLE_TO_CATEGORY[title].name; @@ -58,8 +57,8 @@ exports.getMedia = info => { media.year = getText(meta.subtitle); let type = getText(meta.callToAction).split(' ')[1]; media[type] = getText(meta.title); - media[`${type}_url`] = urllib.resolve(VIDEO_URL, - meta.endpoint.commandMetadata.webCommandMetadata.url); + media[`${type}_url`] = new URL( + meta.endpoint.commandMetadata.webCommandMetadata.url, BASE_URL).toString(); media.thumbnails = meta.thumbnail.thumbnails; } let topic = contents @@ -67,8 +66,8 @@ exports.getMedia = info => { for (let { richMetadataRenderer } of topic) { let meta = richMetadataRenderer; media.category = getText(meta.title); - media.category_url = urllib.resolve(VIDEO_URL, - meta.endpoint.commandMetadata.webCommandMetadata.url); + media.category_url = new URL( + meta.endpoint.commandMetadata.webCommandMetadata.url, BASE_URL).toString(); } } } @@ -100,7 +99,7 @@ exports.getAuthor = info => { let videoOwnerRenderer = v.videoSecondaryInfoRenderer.owner.videoOwnerRenderer; channelId = videoOwnerRenderer.navigationEndpoint.browseEndpoint.browseId; thumbnails = videoOwnerRenderer.thumbnail.thumbnails.map(thumbnail => { - thumbnail.url = urllib.resolve(VIDEO_URL, thumbnail.url); + thumbnail.url = new URL(thumbnail.url, BASE_URL).toString(); return thumbnail; }); subscriberCount = utils.parseAbbreviatedNumber(getText(videoOwnerRenderer.subscriberCountText)); @@ -117,7 +116,7 @@ exports.getAuthor = info => { user: videoDetails ? videoDetails.ownerProfileUrl.split('/').slice(-1)[0] : null, channel_url: `https://www.youtube.com/channel/${id}`, external_channel_url: videoDetails ? `https://www.youtube.com/channel/${videoDetails.externalChannelId}` : '', - user_url: videoDetails ? urllib.resolve(VIDEO_URL, videoDetails.ownerProfileUrl) : '', + user_url: videoDetails ? new URL(videoDetails.ownerProfileUrl, BASE_URL).toString() : '', thumbnails, verified, subscriber_count: subscriberCount, @@ -156,7 +155,7 @@ const parseRelatedVideo = (details, rvsParams) => { channel_url: `https://www.youtube.com/channel/${channelId}`, user_url: `https://www.youtube.com/user/${user}`, thumbnails: details.channelThumbnail.thumbnails.map(thumbnail => { - thumbnail.url = urllib.resolve(VIDEO_URL, thumbnail.url); + thumbnail.url = new URL(thumbnail.url, BASE_URL).toString(); return thumbnail; }), verified: isVerified(details.ownerBadges), diff --git a/lib/info.js b/lib/info.js index 67a58f91..54ee4b80 100644 --- a/lib/info.js +++ b/lib/info.js @@ -1,4 +1,4 @@ -const urllib = require('url'); +const { URL } = require('url'); const querystring = require('querystring'); const sax = require('sax'); const miniget = require('miniget'); @@ -12,7 +12,7 @@ const sig = require('./sig'); const Cache = require('./cache'); -const VIDEO_URL = 'https://www.youtube.com/watch?v='; +const BASE_URL = 'https://www.youtube.com/watch?v='; // Cached for storing basic/full info. @@ -80,7 +80,7 @@ exports.getBasicInfo = async(id, options) => { age_restricted: !!(media && media.notice_url && AGE_RESTRICTED_URLS.some(url => media.notice_url.includes(url))), // Give the standard link to the video. - video_url: VIDEO_URL + id, + video_url: BASE_URL + id, storyboards: extras.getStoryboards(info), }; @@ -116,7 +116,7 @@ const isNotYetBroadcasted = player_response => { }; -const getWatchHTMLURL = (id, options) => `${VIDEO_URL + id}&hl=${options.lang || 'en'}`; +const getWatchHTMLURL = (id, options) => `${BASE_URL + id}&hl=${options.lang || 'en'}`; const getWatchHTMLPageBody = (id, options) => { const url = getWatchHTMLURL(id, options); return exports.watchPageCache.getOrSet(url, () => miniget(url, options.requestOptions).text()); @@ -323,19 +323,13 @@ const INFO_HOST = 'www.youtube.com'; const INFO_PATH = '/get_video_info'; const VIDEO_EURL = 'https://youtube.googleapis.com/v/'; const getVideoInfoPage = async(id, options) => { - const url = urllib.format({ - protocol: 'https', - host: INFO_HOST, - pathname: INFO_PATH, - query: { - video_id: id, - eurl: VIDEO_EURL + id, - ps: 'default', - gl: 'US', - hl: options.lang || 'en', - }, - }); - let body = await miniget(url, options.requestOptions).text(); + const url = new URL(`https://${INFO_HOST}${INFO_PATH}`); + url.searchParams.set('video_id', id); + url.searchParams.set('eurl', VIDEO_EURL + id); + url.searchParams.set('ps', 'default'); + url.searchParams.set('gl', 'US'); + url.searchParams.set('hl', options.lang || 'en'); + let body = await miniget(url.toString(), options.requestOptions).text(); let info = querystring.parse(body); info.player_response = findPlayerResponse('get_video_info', info); return info; @@ -378,7 +372,7 @@ exports.getInfo = async(id, options) => { if (!info.html5player) { throw Error('Unable to find html5player file'); } - const html5player = urllib.resolve(VIDEO_URL, info.html5player); + const html5player = new URL(info.html5player, BASE_URL).toString(); funcs.push(sig.decipherFormats(info.formats, html5player, options)); } if (hasManifest && info.player_response.streamingData.dashManifestUrl) { @@ -432,7 +426,7 @@ const getDashManifest = (url, options) => new Promise((resolve, reject) => { } }; parser.onend = () => { resolve(formats); }; - const req = miniget(urllib.resolve(VIDEO_URL, url), options.requestOptions); + const req = miniget(new URL(url, BASE_URL).toString(), options.requestOptions); req.setEncoding('utf8'); req.on('error', reject); req.on('data', chunk => { parser.write(chunk); }); @@ -448,8 +442,8 @@ const getDashManifest = (url, options) => new Promise((resolve, reject) => { * @returns {Promise>} */ const getM3U8 = async(url, options) => { - url = urllib.resolve(VIDEO_URL, url); - let body = await miniget(url, options.requestOptions).text(); + url = new URL(url, BASE_URL); + let body = await miniget(url.toString(), options.requestOptions).text(); let formats = {}; body .split('\n') diff --git a/lib/sig.js b/lib/sig.js index 3c812b43..70f227da 100644 --- a/lib/sig.js +++ b/lib/sig.js @@ -1,4 +1,4 @@ -const url = require('url'); +const { URL } = require('url'); const miniget = require('miniget'); const querystring = require('querystring'); const Cache = require('./cache'); @@ -205,25 +205,20 @@ exports.setDownloadURL = (format, sig) => { } // Make some adjustments to the final url. - const parsedUrl = url.parse(decodedUrl, true); - - // Deleting the `search` part is necessary otherwise changes to - // `query` won't reflect when running `url.format()` - delete parsedUrl.search; - - let query = parsedUrl.query; + const parsedUrl = new URL(decodedUrl); // This is needed for a speedier download. // See https://github.com/fent/node-ytdl-core/issues/127 - query.ratebypass = 'yes'; + parsedUrl.searchParams.set('ratebypass', 'yes'); + if (sig) { // When YouTube provides a `sp` parameter the signature `sig` must go // into the parameter it specifies. // See https://github.com/fent/node-ytdl-core/issues/417 - query[format.sp || 'signature'] = sig; + parsedUrl.searchParams.set(format.sp || 'signature', sig); } - format.url = url.format(parsedUrl); + format.url = parsedUrl.toString(); }; diff --git a/lib/url-utils.js b/lib/url-utils.js index 01dedecd..dc229804 100644 --- a/lib/url-utils.js +++ b/lib/url-utils.js @@ -1,4 +1,4 @@ -const url = require('url'); +const { URL } = require('url'); /** @@ -27,8 +27,8 @@ const validQueryDomains = new Set([ ]); const validPathDomains = /^https?:\/\/(youtu\.be\/|(www\.)?youtube.com\/(embed|v)\/)/; exports.getURLVideoID = link => { - const parsed = url.parse(link, true); - let id = parsed.query.v; + const parsed = new URL(link); + let id = parsed.searchParams.get('v'); if (validPathDomains.test(link) && !id) { const paths = parsed.pathname.split('/'); id = paths[paths.length - 1]; @@ -56,11 +56,14 @@ exports.getURLVideoID = link => { * @throws {Error} If unable to find a id * @throws {TypeError} If videoid doesn't match specs */ +const urlRegex = /^https?:\/\//; exports.getVideoID = str => { if (exports.validateID(str)) { return str; - } else { + } else if (urlRegex.test(str)) { return exports.getURLVideoID(str); + } else { + throw Error(`No video id found: ${str}`); } }; diff --git a/test/download-test.js b/test/download-test.js index f38e489c..3e49e859 100644 --- a/test/download-test.js +++ b/test/download-test.js @@ -1,7 +1,7 @@ const assert = require('assert'); const path = require('path'); const fs = require('fs'); -const url = require('url'); +const { URL } = require('url'); const streamEqual = require('stream-equal'); const sinon = require('sinon'); const nock = require('./nock'); @@ -575,7 +575,7 @@ describe('Download video', () => { '/file03.ts', '#EXT-X-ENDLIST', ].join('\n')); - const host = url.parse(format.url).host; + const host = new URL(format.url).host; scope.urlReply(`https://${host}/file01.ts`, 200, 'one', { 'content-length': '3', }); diff --git a/test/files/refresh.js b/test/files/refresh.js index d090a793..1f1d8461 100644 --- a/test/files/refresh.js +++ b/test/files/refresh.js @@ -117,7 +117,7 @@ const videos = [ const fs = require('fs'); const path = require('path'); -const urlParse = require('url').parse; +const { URL } = require('url'); const { PassThrough } = require('stream'); const mukRequire = require('muk-require'); const miniget = require('miniget'); @@ -176,7 +176,7 @@ const refreshVideo = async(video, noRequests) => { }; const getFilenameFromURL = url => { - let parsed = urlParse(url); + let parsed = new URL(url); let s = parsed.pathname.split('/'); let filename = // Special case for livestream manifest files. diff --git a/test/nock.js b/test/nock.js index b88efacc..b80e1a82 100644 --- a/test/nock.js +++ b/test/nock.js @@ -1,7 +1,7 @@ const ytdl = require('..'); const path = require('path'); const fs = require('fs'); -const url = require('url'); +const { URL } = require('url'); const nock = require('nock'); const YT_HOST = 'https://www.youtube.com'; @@ -144,15 +144,15 @@ exports = module.exports = (id, type, opts = {}) => { }; exports.filteringPath = (uri, filter1, filter2) => { - let parsed = url.parse(uri); - return nock(`${parsed.protocol}//${parsed.host}`) + let parsed = new URL(uri); + return nock(parsed.origin) .filteringPath(filter1, filter2) - .get(parsed.path); + .get(parsed.pathname + parsed.search + parsed.hash); }; exports.url = uri => { - let parsed = url.parse(uri); - return nock(`${parsed.protocol}//${parsed.host}`).get(parsed.path); + let parsed = new URL(uri); + return nock(parsed.origin).get(parsed.pathname + parsed.search + parsed.hash); }; exports.cleanAll = nock.cleanAll;