diff options
Diffstat (limited to '')
-rw-r--r-- | yt_dlp/extractor/patreon.py | 43 |
1 files changed, 26 insertions, 17 deletions
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 6c441ff..7d6e843 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -2,6 +2,7 @@ import itertools import urllib.parse from .common import InfoExtractor +from .sproutvideo import VidsIoIE from .vimeo import VimeoIE from ..networking.exceptions import HTTPError from ..utils import ( @@ -12,6 +13,7 @@ from ..utils import ( int_or_none, mimetype2ext, parse_iso8601, + smuggle_url, str_or_none, traverse_obj, url_or_none, @@ -33,7 +35,7 @@ class PatreonBaseIE(InfoExtractor): try: return self._download_json( f'https://www.patreon.com/api/{ep}', - item_id, note='Downloading API JSON' if not note else note, + item_id, note=note if note else 'Downloading API JSON', query=query, fatal=fatal, headers=headers) except ExtractorError as e: if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.response.headers.get('Content-Type')) != 'json': @@ -113,7 +115,7 @@ class PatreonIE(PatreonBaseIE): 'params': { 'noplaylist': True, 'skip_download': True, - } + }, }, { 'url': 'https://www.patreon.com/posts/episode-166-of-743933', 'only_matching': True, @@ -133,7 +135,7 @@ class PatreonIE(PatreonBaseIE): 'description': 'md5:557a409bd79d3898689419094934ba79', 'uploader_id': '14936315', }, - 'skip': 'Patron-only content' + 'skip': 'Patron-only content', }, { # m3u8 video (https://github.com/yt-dlp/yt-dlp/issues/2277) 'url': 'https://www.patreon.com/posts/video-sketchbook-32452882', @@ -154,7 +156,7 @@ class PatreonIE(PatreonBaseIE): 'channel_id': '1641751', 'channel_url': 'https://www.patreon.com/loish', 'channel_follower_count': int, - } + }, }, { # bad videos under media (if media is included). Real one is under post_file 'url': 'https://www.patreon.com/posts/premium-access-70282931', @@ -305,22 +307,28 @@ class PatreonIE(PatreonBaseIE): 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}), })) + # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo + headers = {'referer': 'https://patreon.com/'} + # handle Vimeo embeds if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo': v_url = urllib.parse.unquote(self._html_search_regex( r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '') if url_or_none(v_url) and self._request_webpage( - v_url, video_id, 'Checking Vimeo embed URL', - headers={'Referer': 'https://patreon.com/'}, - fatal=False, errnote=False): + v_url, video_id, 'Checking Vimeo embed URL', headers=headers, + fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection entries.append(self.url_result( VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'), VimeoIE, url_transparent=True)) embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none})) - if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False): - entries.append(self.url_result(embed_url)) + if embed_url and (urlh := self._request_webpage( + embed_url, video_id, 'Checking embed URL', headers=headers, + fatal=False, errnote=False, expected_status=403)): + # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie + if urlh.status != 403 or VidsIoIE.suitable(embed_url): + entries.append(self.url_result(smuggle_url(embed_url, headers))) post_file = traverse_obj(attributes, ('post_file', {dict})) if post_file: @@ -378,7 +386,7 @@ class PatreonIE(PatreonBaseIE): params.update({'page[cursor]': cursor} if cursor else {}) response = self._call_api( - f'posts/{post_id}/comments', post_id, query=params, note='Downloading comments page %d' % page) + f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}') cursor = None for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)): @@ -446,18 +454,18 @@ class PatreonCampaignIE(PatreonBaseIE): 'uploader_id': '37306634', 'thumbnail': r're:^https?://.*$', }, - 'playlist_mincount': 71 + 'playlist_mincount': 71, }, { 'url': 'https://www.patreon.com/dissonancepod/posts', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.patreon.com/m/5932659', - 'only_matching': True + 'only_matching': True, }] @classmethod def suitable(cls, url): - return False if PatreonIE.suitable(url) else super(PatreonCampaignIE, cls).suitable(url) + return False if PatreonIE.suitable(url) else super().suitable(url) def _entries(self, campaign_id): cursor = None @@ -472,7 +480,7 @@ class PatreonCampaignIE(PatreonBaseIE): for page in itertools.count(1): params.update({'page[cursor]': cursor} if cursor else {}) - posts_json = self._call_api('posts', campaign_id, query=params, note='Downloading posts page %d' % page) + posts_json = self._call_api('posts', campaign_id, query=params, note=f'Downloading posts page {page}') cursor = traverse_obj(posts_json, ('meta', 'pagination', 'cursors', 'next')) for post_url in traverse_obj(posts_json, ('data', ..., 'attributes', 'patreon_url')): @@ -486,13 +494,14 @@ class PatreonCampaignIE(PatreonBaseIE): campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT}) - campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID') + campaign_id = self._search_nextjs_data( + webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] params = { 'json-api-use-default-includes': 'false', 'fields[user]': 'full_name,url', 'fields[campaign]': 'name,summary,url,patron_count,creation_count,is_nsfw,avatar_photo_url', - 'include': 'creator' + 'include': 'creator', } campaign_response = self._call_api( |