summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/patreon.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--yt_dlp/extractor/patreon.py43
1 files changed, 26 insertions, 17 deletions
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index 6c441ff..7d6e843 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -2,6 +2,7 @@ import itertools
import urllib.parse
from .common import InfoExtractor
+from .sproutvideo import VidsIoIE
from .vimeo import VimeoIE
from ..networking.exceptions import HTTPError
from ..utils import (
@@ -12,6 +13,7 @@ from ..utils import (
int_or_none,
mimetype2ext,
parse_iso8601,
+ smuggle_url,
str_or_none,
traverse_obj,
url_or_none,
@@ -33,7 +35,7 @@ class PatreonBaseIE(InfoExtractor):
try:
return self._download_json(
f'https://www.patreon.com/api/{ep}',
- item_id, note='Downloading API JSON' if not note else note,
+ item_id, note=note if note else 'Downloading API JSON',
query=query, fatal=fatal, headers=headers)
except ExtractorError as e:
if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.response.headers.get('Content-Type')) != 'json':
@@ -113,7 +115,7 @@ class PatreonIE(PatreonBaseIE):
'params': {
'noplaylist': True,
'skip_download': True,
- }
+ },
}, {
'url': 'https://www.patreon.com/posts/episode-166-of-743933',
'only_matching': True,
@@ -133,7 +135,7 @@ class PatreonIE(PatreonBaseIE):
'description': 'md5:557a409bd79d3898689419094934ba79',
'uploader_id': '14936315',
},
- 'skip': 'Patron-only content'
+ 'skip': 'Patron-only content',
}, {
# m3u8 video (https://github.com/yt-dlp/yt-dlp/issues/2277)
'url': 'https://www.patreon.com/posts/video-sketchbook-32452882',
@@ -154,7 +156,7 @@ class PatreonIE(PatreonBaseIE):
'channel_id': '1641751',
'channel_url': 'https://www.patreon.com/loish',
'channel_follower_count': int,
- }
+ },
}, {
# bad videos under media (if media is included). Real one is under post_file
'url': 'https://www.patreon.com/posts/premium-access-70282931',
@@ -305,22 +307,28 @@ class PatreonIE(PatreonBaseIE):
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
}))
+ # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
+ headers = {'referer': 'https://patreon.com/'}
+
# handle Vimeo embeds
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
v_url = urllib.parse.unquote(self._html_search_regex(
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
if url_or_none(v_url) and self._request_webpage(
- v_url, video_id, 'Checking Vimeo embed URL',
- headers={'Referer': 'https://patreon.com/'},
- fatal=False, errnote=False):
+ v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
+ fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
entries.append(self.url_result(
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
VimeoIE, url_transparent=True))
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
- if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
- entries.append(self.url_result(embed_url))
+ if embed_url and (urlh := self._request_webpage(
+ embed_url, video_id, 'Checking embed URL', headers=headers,
+ fatal=False, errnote=False, expected_status=403)):
+ # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
+ if urlh.status != 403 or VidsIoIE.suitable(embed_url):
+ entries.append(self.url_result(smuggle_url(embed_url, headers)))
post_file = traverse_obj(attributes, ('post_file', {dict}))
if post_file:
@@ -378,7 +386,7 @@ class PatreonIE(PatreonBaseIE):
params.update({'page[cursor]': cursor} if cursor else {})
response = self._call_api(
- f'posts/{post_id}/comments', post_id, query=params, note='Downloading comments page %d' % page)
+ f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
cursor = None
for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)):
@@ -446,18 +454,18 @@ class PatreonCampaignIE(PatreonBaseIE):
'uploader_id': '37306634',
'thumbnail': r're:^https?://.*$',
},
- 'playlist_mincount': 71
+ 'playlist_mincount': 71,
}, {
'url': 'https://www.patreon.com/dissonancepod/posts',
- 'only_matching': True
+ 'only_matching': True,
}, {
'url': 'https://www.patreon.com/m/5932659',
- 'only_matching': True
+ 'only_matching': True,
}]
@classmethod
def suitable(cls, url):
- return False if PatreonIE.suitable(url) else super(PatreonCampaignIE, cls).suitable(url)
+ return False if PatreonIE.suitable(url) else super().suitable(url)
def _entries(self, campaign_id):
cursor = None
@@ -472,7 +480,7 @@ class PatreonCampaignIE(PatreonBaseIE):
for page in itertools.count(1):
params.update({'page[cursor]': cursor} if cursor else {})
- posts_json = self._call_api('posts', campaign_id, query=params, note='Downloading posts page %d' % page)
+ posts_json = self._call_api('posts', campaign_id, query=params, note=f'Downloading posts page {page}')
cursor = traverse_obj(posts_json, ('meta', 'pagination', 'cursors', 'next'))
for post_url in traverse_obj(posts_json, ('data', ..., 'attributes', 'patreon_url')):
@@ -486,13 +494,14 @@ class PatreonCampaignIE(PatreonBaseIE):
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
if campaign_id is None:
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT})
- campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID')
+ campaign_id = self._search_nextjs_data(
+ webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
params = {
'json-api-use-default-includes': 'false',
'fields[user]': 'full_name,url',
'fields[campaign]': 'name,summary,url,patron_count,creation_count,is_nsfw,avatar_photo_url',
- 'include': 'creator'
+ 'include': 'creator',
}
campaign_response = self._call_api(