From 2b0d529dfabc19e017f12f7fefe8d1a9a482dd78 Mon Sep 17 00:00:00 2001 From: HaarigerHarald Date: Sat, 18 Jan 2020 22:42:42 +0100 Subject: [PATCH] Fix Extraction --- .../youtubeExtractor/ExtractorTestCases.java | 9 +- .../at/huber/youtubeExtractor/VideoMeta.java | 10 +- .../youtubeExtractor/YouTubeExtractor.java | 260 ++++++------------ 3 files changed, 103 insertions(+), 176 deletions(-) diff --git a/youtubeExtractor/src/androidTest/java/at/huber/youtubeExtractor/ExtractorTestCases.java b/youtubeExtractor/src/androidTest/java/at/huber/youtubeExtractor/ExtractorTestCases.java index 34161ae..152f51d 100644 --- a/youtubeExtractor/src/androidTest/java/at/huber/youtubeExtractor/ExtractorTestCases.java +++ b/youtubeExtractor/src/androidTest/java/at/huber/youtubeExtractor/ExtractorTestCases.java @@ -33,15 +33,14 @@ public class ExtractorTestCases { @Test public void testUsualVideo() throws Throwable { VideoMeta expMeta = new VideoMeta("YE7VzlLtp-4", "Big Buck Bunny", "Blender", - "UCSMOQeBJ2RAnuFungnQOxLg", 597, 0, false); + "UCSMOQeBJ2RAnuFungnQOxLg", 597, 0, false, ""); extractorTest("http://youtube.com/watch?v=YE7VzlLtp-4", expMeta); - extractorTestDashManifest("http://youtube.com/watch?v=YE7VzlLtp-4"); } @Test public void testUnembeddable() throws Throwable { VideoMeta expMeta = new VideoMeta("QH4VHl2uQ9o", "Match Chain Reaction Amazing Fire Art - real ghost rider", "BLACKHAND", - "UCl9nsRuGenStMDZfD95w85A", 331, 0, false); + "UCl9nsRuGenStMDZfD95w85A", 331, 0, false, ""); extractorTest("https://www.youtube.com/watch?v=QH4VHl2uQ9o", expMeta); extractorTestDashManifest("https://www.youtube.com/watch?v=QH4VHl2uQ9o"); } @@ -49,14 +48,14 @@ public void testUnembeddable() throws Throwable { @Test public void testEncipheredVideo() throws Throwable { VideoMeta expMeta = new VideoMeta("e8X3ACToii0", "Rise Against - Savior (Official Video)", "RiseAgainstVEVO", - "UChMKB2AHNpeuWhalpRYhUaw", 243, 0, false); + "UChMKB2AHNpeuWhalpRYhUaw", 243, 0, false, ""); extractorTest("https://www.youtube.com/watch?v=e8X3ACToii0", expMeta); } @Test public void testAgeRestrictVideo() throws Throwable { VideoMeta expMeta = new VideoMeta("61Ev-YvBw2c", "Test video for age-restriction", - "jpdemoA", "UC95NqtFsDZKlmzOJmZi_g6Q", 14, 0, false); + "jpdemoA", "UC95NqtFsDZKlmzOJmZi_g6Q", 14, 0, false, ""); extractorTest("http://www.youtube.com/watch?v=61Ev-YvBw2c", expMeta); // extractorTestDashManifest("http://www.youtube.com/watch?v=61Ev-YvBw2c"); } diff --git a/youtubeExtractor/src/main/java/at/huber/youtubeExtractor/VideoMeta.java b/youtubeExtractor/src/main/java/at/huber/youtubeExtractor/VideoMeta.java index ff0ca75..d0e1f39 100644 --- a/youtubeExtractor/src/main/java/at/huber/youtubeExtractor/VideoMeta.java +++ b/youtubeExtractor/src/main/java/at/huber/youtubeExtractor/VideoMeta.java @@ -6,6 +6,7 @@ public class VideoMeta { private String videoId; private String title; + private String shortDescript; private String author; private String channelId; @@ -15,7 +16,8 @@ public class VideoMeta { private boolean isLiveStream; - protected VideoMeta(String videoId, String title, String author, String channelId, long videoLength, long viewCount, boolean isLiveStream) { + protected VideoMeta(String videoId, String title, String author, String channelId, + long videoLength, long viewCount, boolean isLiveStream, String shortDescript) { this.videoId = videoId; this.title = title; this.author = author; @@ -23,9 +25,9 @@ protected VideoMeta(String videoId, String title, String author, String channelI this.videoLength = videoLength; this.viewCount = viewCount; this.isLiveStream = isLiveStream; + this.shortDescript = shortDescript; } - // 120 x 90 public String getThumbUrl() { return IMAGE_BASE_URL + videoId + "/default.jpg"; @@ -82,6 +84,10 @@ public long getViewCount() { return viewCount; } + public String getShortDescription() { + return shortDescript; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/youtubeExtractor/src/main/java/at/huber/youtubeExtractor/YouTubeExtractor.java b/youtubeExtractor/src/main/java/at/huber/youtubeExtractor/YouTubeExtractor.java index 1e781db..1045bc9 100644 --- a/youtubeExtractor/src/main/java/at/huber/youtubeExtractor/YouTubeExtractor.java +++ b/youtubeExtractor/src/main/java/at/huber/youtubeExtractor/YouTubeExtractor.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; -import java.io.UnsupportedEncodingException; import java.lang.ref.WeakReference; import java.net.HttpURLConnection; import java.net.URL; @@ -40,14 +39,12 @@ public abstract class YouTubeExtractor extends AsyncTask refContext; private String videoID; private VideoMeta videoMeta; private boolean includeWebM = true; private boolean useHttp = false; - private boolean parseDashManifest = false; private String cacheDirPath; private volatile String decipheredSignature; @@ -60,31 +57,27 @@ public abstract class YouTubeExtractor extends AsyncTask((.+?)jsbin\\\\/(player(_ias)?-(.+?).js)(.+?))"); private static final Pattern patVariableFunction = Pattern.compile("([{; =])([a-zA-Z$][a-zA-Z0-9$]{0,2})\\.([a-zA-Z$][a-zA-Z0-9$]{0,2})\\("); private static final Pattern patFunction = Pattern.compile("([{; =])([a-zA-Z$_][a-zA-Z0-9$]{0,2})\\("); @@ -121,6 +114,8 @@ public abstract class YouTubeExtractor extends AsyncTask ytFiles) { * Start the extraction. * * @param youtubeLink the youtube page link or video id - * @param parseDashManifest true if the dash manifest should be downloaded and parsed + * @param parseDashManifest not supported anymore * @param includeWebM true if WebM streams should be extracted */ public void extract(String youtubeLink, boolean parseDashManifest, boolean includeWebM) { - this.parseDashManifest = parseDashManifest; this.includeWebM = includeWebM; this.execute(youtubeLink); } @@ -215,7 +209,6 @@ private SparseArray getStreamUrls() throws IOException, InterruptedExcep ytInfoUrl += "www.youtube.com/get_video_info?video_id=" + videoID + "&eurl=" + URLEncoder.encode("https://youtube.googleapis.com/v/" + videoID, "UTF-8"); - String dashMpdUrl = null; String streamMap; BufferedReader reader = null; URL getUrl = new URL(ytInfoUrl); @@ -233,10 +226,12 @@ private SparseArray getStreamUrls() throws IOException, InterruptedExcep urlConnection.disconnect(); } Matcher mat; - String curJsFileName = null; - String[] streams; + String curJsFileName; SparseArray encSignatures = null; + streamMap = URLDecoder.decode(streamMap, "UTF-8"); + streamMap = streamMap.replace("\\u0026", "&"); + parseVideoMeta(streamMap); if(videoMeta.isLiveStream()){ @@ -262,8 +257,7 @@ private SparseArray getStreamUrls() throws IOException, InterruptedExcep } } } finally { - if (reader != null) - reader.close(); + reader.close(); urlConnection.disconnect(); } @@ -280,13 +274,10 @@ private SparseArray getStreamUrls() throws IOException, InterruptedExcep // "use_cipher_signature" disappeared, we check whether at least one ciphered signature // exists int the stream_map. boolean sigEnc = true, statusFail = false; - if(streamMap != null && streamMap.contains(STREAM_MAP_STRING)){ - - if(!patIsSigEnc2.matcher(streamMap).find() && !patIsSigEnc.matcher(streamMap).find()) { - sigEnc = false; - - if (!patStatusOk.matcher(streamMap).find()) - statusFail = true; + if(!patCipher.matcher(streamMap).find()) { + sigEnc = false; + if (!patStatusOk.matcher(streamMap).find()) { + statusFail = true; } } @@ -309,14 +300,14 @@ private SparseArray getStreamUrls() throws IOException, InterruptedExcep String line; while ((line = reader.readLine()) != null) { // Log.d("line", line); - if (line.contains(STREAM_MAP_STRING)) { - streamMap = line.replace("\\u0026", "&"); + mat = patYtPlayer.matcher(line); + if (mat.find()) { + streamMap = line.replace("\\\"", "\""); break; } } } finally { - if (reader != null) - reader.close(); + reader.close(); urlConnection.disconnect(); } encSignatures = new SparseArray<>(); @@ -332,82 +323,68 @@ private SparseArray getStreamUrls() throws IOException, InterruptedExcep } decipherJsFileName = curJsFileName; } + } - if (parseDashManifest) { - mat = patDashManifest2.matcher(streamMap); - if (mat.find()) { - dashMpdUrl = mat.group(1).replace("\\/", "/"); - mat = patDashManifestEncSig.matcher(dashMpdUrl); - if (mat.find()) { - encSignatures.append(0, mat.group(1)); + SparseArray ytFiles = new SparseArray<>(); + + if (sigEnc) { + mat = patCipher.matcher(streamMap); + } + else { + mat = patUrl.matcher(streamMap); + } + + while (mat.find()) { + String sig = null; + String url; + if (sigEnc) { + String cipher = mat.group(1); + Log.d(LOG_TAG, cipher); + Matcher mat2 = patCipherUrl.matcher(cipher); + if (mat2.find()) { + url = URLDecoder.decode(mat2.group(1), "UTF-8"); + mat2 = patEncSig.matcher(cipher); + if (mat2.find()) { + sig = URLDecoder.decode(mat2.group(1), "UTF-8"); } else { - dashMpdUrl = null; + continue; } + } else { + continue; } + } else { + url = mat.group(1); } - } else { - if (parseDashManifest) { - mat = patDashManifest1.matcher(streamMap); - if (mat.find()) { - dashMpdUrl = URLDecoder.decode(mat.group(1), "UTF-8"); - } - } - streamMap = URLDecoder.decode(streamMap, "UTF-8"); - } - boolean oldSignature = false; - streams = streamMap.split(",|"+STREAM_MAP_STRING+"|&adaptive_fmts="); - SparseArray ytFiles = new SparseArray<>(); - for (String encStream : streams) { - encStream = encStream + ","; - if (!encStream.contains("itag%3D")) { + Matcher mat2 = patItag.matcher(url); + if (!mat2.find()) continue; - } - String stream; - stream = URLDecoder.decode(encStream, "UTF-8"); - mat = patItag.matcher(stream); - int itag; - if (mat.find()) { - itag = Integer.parseInt(mat.group(1)); + int itag = Integer.parseInt(mat2.group(1)); + + if (FORMAT_MAP.get(itag) == null) { if (LOGGING) - Log.d(LOG_TAG, "Itag found:" + itag); - if (FORMAT_MAP.get(itag) == null) { - if (LOGGING) - Log.d(LOG_TAG, "Itag not in list:" + itag); - continue; - } else if (!includeWebM && FORMAT_MAP.get(itag).getExt().equals("webm")) { - continue; - } - } else { + Log.d(LOG_TAG, "Itag not in list:" + itag); + continue; + } else if (!includeWebM && FORMAT_MAP.get(itag).getExt().equals("webm")) { continue; } - if (curJsFileName != null) { - mat = patEncSig2.matcher(stream); - if (mat.find()) { - encSignatures.append(itag, URLDecoder.decode(mat.group(2), "UTF-8")); - } else { - mat = patEncSig.matcher(stream); - if (mat.find()) - { - encSignatures.append(itag, mat.group(1)); - oldSignature = true; - } - } - } - mat = patUrl.matcher(encStream); - String url = null; - if (mat.find()) { - url = mat.group(1); - } + // Unsupported + if (url.contains("&source=yt_otf&")) + continue; - if (url != null) { - Format format = FORMAT_MAP.get(itag); - String finalUrl = URLDecoder.decode(url, "UTF-8"); - YtFile newVideo = new YtFile(format, finalUrl); - ytFiles.put(itag, newVideo); + if (LOGGING) + Log.d(LOG_TAG, "Itag found:" + itag); + + if (sig != null) { + encSignatures.append(itag, sig); } + + Format format = FORMAT_MAP.get(itag); + Log.d(LOG_TAG, url); + YtFile newVideo = new YtFile(format, url); + ytFiles.put(itag, newVideo); } if (encSignatures != null) { @@ -431,28 +408,10 @@ private SparseArray getStreamUrls() throws IOException, InterruptedExcep String[] sigs = signature.split("\n"); for (int i = 0; i < encSignatures.size() && i < sigs.length; i++) { int key = encSignatures.keyAt(i); - if (key == 0) { - dashMpdUrl = dashMpdUrl.replace("/s/" + encSignatures.get(key), "/signature/" + sigs[i]); - } else { - String url = ytFiles.get(key).getUrl(); - url += (oldSignature ? "&signature=" : "&sig=") + sigs[i]; - YtFile newFile = new YtFile(FORMAT_MAP.get(key), url); - ytFiles.put(key, newFile); - } - } - } - } - - if (parseDashManifest && dashMpdUrl != null) { - for (int i = 0; i < DASH_PARSE_RETRIES; i++) { - try { - // It sometimes fails to connect for no apparent reason. We just retry. - parseDashManifest(dashMpdUrl, ytFiles); - break; - } catch (IOException io) { - Thread.sleep(5); - if (LOGGING) - Log.d(LOG_TAG, "Failed to parse dash manifest " + (i + 1)); + String url = ytFiles.get(key).getUrl(); + url += "&sig=" + sigs[i]; + YtFile newFile = new YtFile(FORMAT_MAP.get(key), url); + ytFiles.put(key, newFile); } } } @@ -584,52 +543,13 @@ else if (javascriptFile.charAt(i) == '}') return true; } - private void parseDashManifest(String dashMpdUrl, SparseArray ytFiles) throws IOException { - Pattern patBaseUrl = Pattern.compile("<\\s*BaseURL(.*?)>(.+?)<\\s*/BaseURL\\s*>"); - Pattern patDashItag = Pattern.compile("itag/([0-9]+?)/"); - String dashManifest; - BufferedReader reader = null; - URL getUrl = new URL(dashMpdUrl); - HttpURLConnection urlConnection = (HttpURLConnection) getUrl.openConnection(); - urlConnection.setRequestProperty("User-Agent", USER_AGENT); - try { - reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream())); - reader.readLine(); - dashManifest = reader.readLine(); - - } finally { - if (reader != null) - reader.close(); - urlConnection.disconnect(); - } - if (dashManifest == null) - return; - Matcher mat = patBaseUrl.matcher(dashManifest); - while (mat.find()) { - int itag; - String url = mat.group(2); - Matcher mat2 = patDashItag.matcher(url); - if (mat2.find()) { - itag = Integer.parseInt(mat2.group(1)); - if (FORMAT_MAP.get(itag) == null) - continue; - if (!includeWebM && FORMAT_MAP.get(itag).getExt().equals("webm")) - continue; - } else { - continue; - } - YtFile yf = new YtFile(FORMAT_MAP.get(itag), url); - ytFiles.append(itag, yf); - } - } - - private void parseVideoMeta(String getVideoInfo) throws UnsupportedEncodingException { + private void parseVideoMeta(String getVideoInfo) { boolean isLiveStream = false; - String title = null, author = null, channelId = null; + String title = null, author = null, channelId = null, shortDescript = null; long viewCount = 0, length = 0; Matcher mat = patTitle.matcher(getVideoInfo); if (mat.find()) { - title = URLDecoder.decode(mat.group(1), "UTF-8"); + title = mat.group(1); } mat = patHlsvp.matcher(getVideoInfo); @@ -638,12 +558,17 @@ private void parseVideoMeta(String getVideoInfo) throws UnsupportedEncodingExcep mat = patAuthor.matcher(getVideoInfo); if (mat.find()) { - author = URLDecoder.decode(mat.group(1), "UTF-8"); + author = mat.group(1); } mat = patChannelId.matcher(getVideoInfo); if (mat.find()) { channelId = mat.group(1); } + mat = patShortDescript.matcher(getVideoInfo); + if (mat.find()) { + shortDescript = mat.group(1); + } + mat = patLength.matcher(getVideoInfo); if (mat.find()) { length = Long.parseLong(mat.group(1)); @@ -652,8 +577,7 @@ private void parseVideoMeta(String getVideoInfo) throws UnsupportedEncodingExcep if (mat.find()) { viewCount = Long.parseLong(mat.group(1)); } - videoMeta = new VideoMeta(videoID, title, author, channelId, length, viewCount, isLiveStream); - + videoMeta = new VideoMeta(videoID, title, author, channelId, length, viewCount, isLiveStream, shortDescript); } private void readDecipherFunctFromCache() { @@ -681,10 +605,9 @@ private void readDecipherFunctFromCache() { } /** - * Parse the dash manifest for different dash streams and high quality audio. Default: false + * not supported anymore */ public void setParseDashManifest(boolean parseDashManifest) { - this.parseDashManifest = parseDashManifest; } @@ -779,5 +702,4 @@ public void onError(String errorMessage) { } }); } - }