Merging upstream version 2024.05.26.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 03:10:23 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-30 03:10:23 +0000
commit: 96011c9a0b8a4f10e6e06f76acc380d6a802bbee (patch)
tree: 30ded50e296e5d936800d19ada594982f10111d6 /yt_dlp/extractor/reddit.py
parent: Adding debian version 2024.04.09-1. (diff)
download: yt-dlp-96011c9a0b8a4f10e6e06f76acc380d6a802bbee.tar.xz
yt-dlp-96011c9a0b8a4f10e6e06f76acc380d6a802bbee.zip
1 files changed, 60 insertions, 3 deletions
diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 62f669f..bc3e5f7 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -5,11 +5,13 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     int_or_none,
+    parse_qs,
     traverse_obj,
     try_get,
     unescapeHTML,
-    urlencode_postdata,
+    update_url_query,
     url_or_none,
+    urlencode_postdata,
 )
 
 
@@ -76,7 +78,7 @@ class RedditIE(InfoExtractor):
             'like_count': int,
             'dislike_count': int,
             'comment_count': int,
-            'age_limit': 0,
+            'age_limit': 18,
             'channel_id': 'u_creepyt0es',
         },
         'params': {
@@ -151,6 +153,51 @@ class RedditIE(InfoExtractor):
         },
         'skip': 'Requires account that has opted-in to the GenZedong subreddit',
     }, {
+        # subtitles in HLS manifest
+        'url': 'https://www.reddit.com/r/Unexpected/comments/1cl9h0u/the_insurance_claim_will_be_interesting/',
+        'info_dict': {
+            'id': 'a2mdj5d57qyc1',
+            'ext': 'mp4',
+            'display_id': '1cl9h0u',
+            'title': 'The insurance claim will be interesting',
+            'uploader': 'darrenpauli',
+            'channel_id': 'Unexpected',
+            'duration': 53,
+            'upload_date': '20240506',
+            'timestamp': 1714966382,
+            'age_limit': 0,
+            'comment_count': int,
+            'dislike_count': int,
+            'like_count': int,
+            'subtitles': {'en': 'mincount:1'},
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # subtitles from caption-url
+        'url': 'https://www.reddit.com/r/soccer/comments/1cxwzso/tottenham_1_0_newcastle_united_james_maddison_31/',
+        'info_dict': {
+            'id': 'xbmj4t3igy1d1',
+            'ext': 'mp4',
+            'display_id': '1cxwzso',
+            'title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'',
+            'uploader': 'Woodstovia',
+            'channel_id': 'soccer',
+            'duration': 30,
+            'upload_date': '20240522',
+            'timestamp': 1716373798,
+            'age_limit': 0,
+            'comment_count': int,
+            'dislike_count': int,
+            'like_count': int,
+            'subtitles': {'en': 'mincount:1'},
+        },
+        'params': {
+            'skip_download': True,
+            'writesubtitles': True,
+        },
+    }, {
         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
         'only_matching': True,
     }, {
@@ -197,6 +244,12 @@ class RedditIE(InfoExtractor):
         elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
             raise ExtractorError('Unable to login, no cookie was returned')
 
+    def _get_subtitles(self, video_id):
+        # Fallback if there were no subtitles provided by DASH or HLS manifests
+        caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt'
+        if self._is_valid_url(caption_url, video_id, item='subtitles'):
+            return {'en': [{'url': caption_url}]}
+
     def _real_extract(self, url):
         host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
 
@@ -307,6 +360,10 @@ class RedditIE(InfoExtractor):
 
             dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd'
             hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8'
+            qs = traverse_obj(parse_qs(hls_playlist_url), {
+                'f': ('f', 0, {lambda x: ','.join([x, 'subsAll']) if x else 'hd,subsAll'}),
+            })
+            hls_playlist_url = update_url_query(hls_playlist_url, qs)
 
             formats = [{
                 'url': unescapeHTML(reddit_video['fallback_url']),
@@ -332,7 +389,7 @@ class RedditIE(InfoExtractor):
                 'id': video_id,
                 'display_id': display_id,
                 'formats': formats,
-                'subtitles': subtitles,
+                'subtitles': subtitles or self.extract_subtitles(video_id),
                 'duration': int_or_none(reddit_video.get('duration')),
             }
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 03:10:23 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-30 03:10:23 +0000
commit	96011c9a0b8a4f10e6e06f76acc380d6a802bbee (patch)
tree	30ded50e296e5d936800d19ada594982f10111d6 /yt_dlp/extractor/reddit.py
parent	Adding debian version 2024.04.09-1. (diff)
download	yt-dlp-96011c9a0b8a4f10e6e06f76acc380d6a802bbee.tar.xz yt-dlp-96011c9a0b8a4f10e6e06f76acc380d6a802bbee.zip