From 784c0293cc90afa3406777012801e8ee21dfbc9e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 5 Aug 2024 11:07:17 +0200 Subject: Merging upstream version 2024.07.16. Signed-off-by: Daniel Baumann --- yt_dlp/extractor/_extractors.py | 5 - yt_dlp/extractor/adn.py | 23 +- yt_dlp/extractor/afreecatv.py | 22 +- yt_dlp/extractor/box.py | 20 +- yt_dlp/extractor/digitalconcerthall.py | 42 +++- yt_dlp/extractor/discovery.py | 115 --------- yt_dlp/extractor/discoverygo.py | 171 ------------- yt_dlp/extractor/dplay.py | 438 +++++++++++++++++++++------------ yt_dlp/extractor/epidemicsound.py | 23 +- yt_dlp/extractor/generic.py | 5 + yt_dlp/extractor/picarto.py | 16 +- yt_dlp/extractor/soundcloud.py | 4 +- yt_dlp/extractor/tiktok.py | 8 +- yt_dlp/extractor/tv5mondeplus.py | 7 +- yt_dlp/extractor/youtube.py | 41 ++- 15 files changed, 430 insertions(+), 510 deletions(-) delete mode 100644 yt_dlp/extractor/discovery.py delete mode 100644 yt_dlp/extractor/discoverygo.py (limited to 'yt_dlp/extractor') diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fc917ff..d2140bc 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -504,7 +504,6 @@ from .dhm import DHMIE from .digitalconcerthall import DigitalConcertHallIE from .digiteka import DigitekaIE from .discogs import DiscogsReleasePlaylistIE -from .discovery import DiscoveryIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dlf import ( @@ -532,16 +531,12 @@ from .dplay import ( DiscoveryPlusIndiaShowIE, DiscoveryPlusItalyIE, DiscoveryPlusItalyShowIE, - DIYNetworkIE, DPlayIE, FoodNetworkIE, - GlobalCyclingNetworkPlusIE, GoDiscoveryIE, HGTVDeIE, HGTVUsaIE, InvestigationDiscoveryIE, - MotorTrendIE, - MotorTrendOnDemandIE, ScienceChannelIE, TravelChannelIE, ) diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 7be990b..3370717 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -16,6 +16,7 @@ from ..utils import ( float_or_none, int_or_none, intlist_to_bytes, + join_nonempty, long_to_bytes, parse_iso8601, pkcs1pad, @@ -48,9 +49,9 @@ class ADNBaseIE(InfoExtractor): class ADNIE(ADNBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?Pfr|de)/video/[^/?#]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?Pde)/)?video/[^/?#]+/(?P\d+)' _TESTS = [{ - 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir', + 'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir', 'md5': '1c9ef066ceb302c86f80c2b371615261', 'info_dict': { 'id': '9841', @@ -70,10 +71,10 @@ class ADNIE(ADNBaseIE): }, 'skip': 'Only available in French and German speaking Europe', }, { - 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', + 'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', 'only_matching': True, }, { - 'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1', + 'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1', 'md5': '5c5651bf5791fa6fcd7906012b9d94e8', 'info_dict': { 'id': '23550', @@ -217,7 +218,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' links_data = self._download_json( links_url, video_id, 'Downloading links JSON metadata', headers={ 'X-Player-Token': authorization, - 'X-Target-Distribution': lang, + 'X-Target-Distribution': lang or 'fr', **self._HEADERS, }, query={ 'freeWithAds': 'true', @@ -298,9 +299,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' class ADNSeasonIE(ADNBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?Pfr|de)/video/(?P[^/?#]+)/?(?:$|[#?])' + _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?Pde)/)?video/(?P[^/?#]+)/?(?:$|[#?])' _TESTS = [{ - 'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new', + 'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new', 'playlist_count': 12, 'info_dict': { 'id': '911', @@ -318,7 +319,7 @@ class ADNSeasonIE(ADNBaseIE): episodes = self._download_json( f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug, 'Downloading episode list', headers={ - 'X-Target-Distribution': lang, + 'X-Target-Distribution': lang or 'fr', **self._HEADERS, }, query={ 'order': 'asc', @@ -327,8 +328,8 @@ class ADNSeasonIE(ADNBaseIE): def entries(): for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})): - yield self.url_result( - f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}', - ADNIE, episode_id) + yield self.url_result(join_nonempty( + 'https://animationdigitalnetwork.com', lang, 'video', + video_show_slug, episode_id, delim='/'), ADNIE, episode_id) return self.playlist_result(entries(), show_id, show.get('title')) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index f51b5a6..815d205 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -1,6 +1,7 @@ import functools from .common import InfoExtractor +from ..networking import Request from ..utils import ( ExtractorError, OnDemandPagedList, @@ -58,6 +59,13 @@ class AfreecaTVBaseIE(InfoExtractor): f'Unable to login: {self.IE_NAME} said: {error}', expected=True) + def _call_api(self, endpoint, display_id, data=None, headers=None, query=None): + return self._download_json(Request( + f'https://api.m.afreecatv.com/{endpoint}', + data=data, headers=headers, query=query, + extensions={'legacy_ssl': True}), display_id, + 'Downloading API JSON', 'Unable to download API JSON') + class AfreecaTVIE(AfreecaTVBaseIE): IE_NAME = 'afreecatv' @@ -184,12 +192,12 @@ class AfreecaTVIE(AfreecaTVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - data = self._download_json( - 'https://api.m.afreecatv.com/station/video/a/view', video_id, - headers={'Referer': url}, data=urlencode_postdata({ + data = self._call_api( + 'station/video/a/view', video_id, headers={'Referer': url}, + data=urlencode_postdata({ 'nTitleNo': video_id, 'nApiLevel': 10, - }), impersonate=True)['data'] + }))['data'] error_code = traverse_obj(data, ('code', {int})) if error_code == -6221: @@ -267,9 +275,9 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - data = self._download_json( - 'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url}, - query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True) + data = self._call_api( + 'catchstory/a/view', video_id, headers={'Referer': url}, + query={'aStoryListIdx': '', 'nStoryIdx': video_id}) return self.playlist_result(self._entries(data), video_id) diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 3547ad9..f06339f 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj class BoxIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P[^/?#]+)(?:/file/(?P\d+))?' + _VALID_URL = r'https?://(?:[^.]+\.)?(?Papp|ent)\.box\.com/s/(?P[^/?#]+)(?:/file/(?P\d+))?' _TESTS = [{ 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538', 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43', @@ -38,10 +38,22 @@ class BoxIE(InfoExtractor): 'uploader_id': '239068974', }, 'params': {'skip_download': 'dash fragment too small'}, + }, { + 'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065', + 'info_dict': { + 'id': '1536173056065', + 'ext': 'mp4', + 'uploader_id': '18523128264', + 'uploader': 'Lexi Hennigan', + 'title': 'iPSC Symposium recording part 1.mp4', + 'timestamp': 1716228343, + 'upload_date': '20240520', + }, + 'params': {'skip_download': 'dash fragment too small'}, }] def _real_extract(self, url): - shared_name, file_id = self._match_valid_url(url).groups() + shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service') webpage = self._download_webpage(url, file_id or shared_name) if not file_id: @@ -57,14 +69,14 @@ class BoxIE(InfoExtractor): request_token = self._search_json( r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken'] access_token = self._download_json( - 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id, + f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id, 'Downloading token JSON metadata', data=json.dumps({'fileIDs': [file_id]}).encode(), headers={ 'Content-Type': 'application/json', 'X-Request-Token': request_token, 'X-Box-EndUser-API': 'sharedName=' + shared_name, })[file_id]['read'] - shared_link = 'https://app.box.com/s/' + shared_name + shared_link = f'https://{service}.box.com/s/{shared_name}' f = self._download_json( 'https://api.box.com/2.0/files/' + file_id, file_id, 'Downloading file JSON metadata', headers={ diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 8b4d5c0..edb6fa9 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -1,6 +1,8 @@ from .common import InfoExtractor +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + parse_codecs, try_get, url_or_none, urlencode_postdata, @@ -12,6 +14,7 @@ class DigitalConcertHallIE(InfoExtractor): IE_DESC = 'DigitalConcertHall extractor' _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P[a-z]+)/(?Pfilm|concert|work)/(?P[0-9]+)-?(?P[0-9]+)?' _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token' + _USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15' _ACCESS_TOKEN = None _NETRC_MACHINE = 'digitalconcerthall' _TESTS = [{ @@ -68,33 +71,42 @@ class DigitalConcertHallIE(InfoExtractor): }] def _perform_login(self, username, password): - token_response = self._download_json( + login_token = self._download_json( self._OAUTH_URL, None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({ 'affiliate': 'none', 'grant_type': 'device', 'device_vendor': 'unknown', + # device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio + 'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari', 'app_id': 'dch.webapp', - 'app_version': '1.0.0', + 'app_distributor': 'berlinphil', + 'app_version': '1.84.0', 'client_secret': '2ySLN+2Fwb', }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - }) - self._ACCESS_TOKEN = token_response['access_token'] + 'Accept': 'application/json', + 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', + 'User-Agent': self._USER_AGENT, + })['access_token'] try: - self._download_json( + login_response = self._download_json( self._OAUTH_URL, None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({ 'grant_type': 'password', 'username': username, 'password': password, }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', + 'Accept': 'application/json', + 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', 'Referer': 'https://www.digitalconcerthall.com', - 'Authorization': f'Bearer {self._ACCESS_TOKEN}', + 'Authorization': f'Bearer {login_token}', + 'User-Agent': self._USER_AGENT, }) - except ExtractorError: - self.raise_login_required(msg='Login info incorrect') + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 401: + raise ExtractorError('Invalid username or password', expected=True) + raise + self._ACCESS_TOKEN = login_response['access_token'] def _real_initialize(self): if not self._ACCESS_TOKEN: @@ -108,11 +120,15 @@ class DigitalConcertHallIE(InfoExtractor): 'Accept': 'application/json', 'Authorization': f'Bearer {self._ACCESS_TOKEN}', 'Accept-Language': language, + 'User-Agent': self._USER_AGENT, }) formats = [] for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): - formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False)) + formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + for fmt in formats: + if fmt.get('format_note') and fmt.get('vcodec') == 'none': + fmt.update(parse_codecs(fmt['format_note'])) yield { 'id': video_id, @@ -140,13 +156,15 @@ class DigitalConcertHallIE(InfoExtractor): f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={ 'Accept': 'application/json', 'Accept-Language': language, + 'User-Agent': self._USER_AGENT, + 'Authorization': f'Bearer {self._ACCESS_TOKEN}', }) - album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...)) if type_ == 'work': videos = [videos[int(part) - 1]] + album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name', {str})) thumbnail = traverse_obj(vid_info, ( 'image', ..., {self._proto_relative_url}, {url_or_none}, {lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py deleted file mode 100644 index b98279d..0000000 --- a/yt_dlp/extractor/discovery.py +++ /dev/null @@ -1,115 +0,0 @@ -import random -import string -import urllib.parse - -from .discoverygo import DiscoveryGoBaseIE -from ..networking.exceptions import HTTPError -from ..utils import ExtractorError - - -class DiscoveryIE(DiscoveryGoBaseIE): - _VALID_URL = r'''(?x)https?:// - (?P - go\.discovery| - www\. - (?: - investigationdiscovery| - discoverylife| - animalplanet| - ahctv| - destinationamerica| - sciencechannel| - tlc - )| - watch\. - (?: - hgtv| - foodnetwork| - travelchannel| - diynetwork| - cookingchanneltv| - motortrend - ) - )\.com/tv-shows/(?P[^/]+)/(?:video|full-episode)s/(?P[^./?#]+)''' - _TESTS = [{ - 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', - 'info_dict': { - 'id': '5a2f35ce6b66d17a5026e29e', - 'ext': 'mp4', - 'title': 'Riding with Matthew Perry', - 'description': 'md5:a34333153e79bc4526019a5129e7f878', - 'duration': 84, - }, - 'params': { - 'skip_download': True, # requires ffmpeg - }, - }, { - 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', - 'only_matching': True, - }, { - 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', - 'only_matching': True, - }, { - # using `show_slug` is important to get the correct video data - 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special', - 'only_matching': True, - }] - _GEO_COUNTRIES = ['US'] - _GEO_BYPASS = False - _API_BASE_URL = 'https://api.discovery.com/v1/' - - def _real_extract(self, url): - site, show_slug, display_id = self._match_valid_url(url).groups() - - access_token = None - cookies = self._get_cookies(url) - - # prefer Affiliate Auth Token over Anonymous Auth Token - auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn') - if auth_storage_cookie and auth_storage_cookie.value: - auth_storage = self._parse_json(urllib.parse.unquote( - urllib.parse.unquote(auth_storage_cookie.value)), - display_id, fatal=False) or {} - access_token = auth_storage.get('a') or auth_storage.get('access_token') - - if not access_token: - access_token = self._download_json( - f'https://{site}.com/anonymous', display_id, - 'Downloading token JSON metadata', query={ - 'authRel': 'authorization', - 'client_id': '3020a40c2356a645b4b4', - 'nonce': ''.join(random.choices(string.ascii_letters, k=32)), - 'redirectUri': 'https://www.discovery.com/', - })['access_token'] - - headers = self.geo_verification_headers() - headers['Authorization'] = 'Bearer ' + access_token - - try: - video = self._download_json( - self._API_BASE_URL + 'content/videos', - display_id, 'Downloading content JSON metadata', - headers=headers, query={ - 'embed': 'show.name', - 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags', - 'slug': display_id, - 'show_slug': show_slug, - })[0] - video_id = video['id'] - stream = self._download_json( - self._API_BASE_URL + 'streaming/video/' + video_id, - display_id, 'Downloading streaming JSON metadata', headers=headers) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): - e_description = self._parse_json( - e.cause.response.read().decode(), display_id)['description'] - if 'resource not available for country' in e_description: - self.raise_geo_restricted(countries=self._GEO_COUNTRIES) - if 'Authorized Networks' in e_description: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.', expected=True) - raise ExtractorError(e_description) - raise - - return self._extract_video_info(video, stream, display_id) diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py deleted file mode 100644 index 9649485..0000000 --- a/yt_dlp/extractor/discoverygo.py +++ /dev/null @@ -1,171 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - determine_ext, - extract_attributes, - int_or_none, - parse_age_limit, - remove_end, - unescapeHTML, - url_or_none, -) - - -class DiscoveryGoBaseIE(InfoExtractor): - _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?: - discovery| - investigationdiscovery| - discoverylife| - animalplanet| - ahctv| - destinationamerica| - sciencechannel| - tlc| - velocitychannel - )go\.com/%s(?P[^/?#&]+)''' - - def _extract_video_info(self, video, stream, display_id): - title = video['name'] - - if not stream: - if video.get('authenticated') is True: - raise ExtractorError( - 'This video is only available via cable service provider subscription that' - ' is not currently supported. You may want to use --cookies.', expected=True) - else: - raise ExtractorError('Unable to find stream') - STREAM_URL_SUFFIX = 'streamUrl' - formats = [] - for stream_kind in ('', 'hds'): - suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX - stream_url = stream.get(f'{stream_kind}{suffix}') - if not stream_url: - continue - if stream_kind == '': - formats.extend(self._extract_m3u8_formats( - stream_url, display_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif stream_kind == 'hds': - formats.extend(self._extract_f4m_formats( - stream_url, display_id, f4m_id=stream_kind, fatal=False)) - - video_id = video.get('id') or display_id - description = video.get('description', {}).get('detailed') - duration = int_or_none(video.get('duration')) - - series = video.get('show', {}).get('name') - season_number = int_or_none(video.get('season', {}).get('number')) - episode_number = int_or_none(video.get('episodeNumber')) - - tags = video.get('tags') - age_limit = parse_age_limit(video.get('parental', {}).get('rating')) - - subtitles = {} - captions = stream.get('captions') - if isinstance(captions, list): - for caption in captions: - subtitle_url = url_or_none(caption.get('fileUrl')) - if not subtitle_url or not subtitle_url.startswith('http'): - continue - lang = caption.get('fileLang', 'en') - ext = determine_ext(subtitle_url) - subtitles.setdefault(lang, []).append({ - 'url': subtitle_url, - 'ext': 'ttml' if ext == 'xml' else ext, - }) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'duration': duration, - 'series': series, - 'season_number': season_number, - 'episode_number': episode_number, - 'tags': tags, - 'age_limit': age_limit, - 'formats': formats, - 'subtitles': subtitles, - } - - -class DiscoveryGoIE(DiscoveryGoBaseIE): - _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' - _GEO_COUNTRIES = ['US'] - _TEST = { - 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', - 'info_dict': { - 'id': '58c167d86b66d12f2addeb01', - 'ext': 'mp4', - 'title': 'Reaper Madness', - 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', - 'duration': 2519, - 'series': 'Bering Sea Gold', - 'season_number': 8, - 'episode_number': 6, - 'age_limit': 14, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - container = extract_attributes( - self._search_regex( - r'(]+class=["\']video-player-container[^>]+>)', - webpage, 'video container')) - - video = self._parse_json( - container.get('data-video') or container.get('data-json'), - display_id) - - stream = video.get('stream') - - return self._extract_video_info(video, stream, display_id) - - -class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): - _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' - _TEST = { - 'url': 'https://www.discoverygo.com/bering-sea-gold/', - 'info_dict': { - 'id': 'bering-sea-gold', - 'title': 'Bering Sea Gold', - 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e', - }, - 'playlist_mincount': 6, - } - - @classmethod - def suitable(cls, url): - return False if DiscoveryGoIE.suitable(url) else super().suitable(url) - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - entries = [] - for mobj in re.finditer(r'data-json=(["\'])(?P{.+?})\1', webpage): - data = self._parse_json( - mobj.group('json'), display_id, - transform_source=unescapeHTML, fatal=False) - if not isinstance(data, dict) or data.get('type') != 'episode': - continue - episode_url = data.get('socialUrl') - if not episode_url: - continue - entries.append(self.url_result( - episode_url, ie=DiscoveryGoIE.ie_key(), - video_id=data.get('id'))) - - return self.playlist_result( - entries, display_id, - remove_end(self._og_search_title( - webpage, fatal=False), ' | Discovery GO'), - self._og_search_description(webpage)) diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 48eae10..e9f9357 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -346,8 +346,16 @@ class HGTVDeIE(DPlayBaseIE): class DiscoveryPlusBaseIE(DPlayBaseIE): + """Subclasses must set _PRODUCT, _DISCO_API_PARAMS""" + + _DISCO_CLIENT_VER = '27.43.0' + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6' + headers.update({ + 'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}', + 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:{self._DISCO_CLIENT_VER}', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) def _download_video_playback_info(self, disco_base, video_id, headers): return self._download_json( @@ -368,6 +376,26 @@ class DiscoveryPlusBaseIE(DPlayBaseIE): class GoDiscoveryIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://go.discovery.com/video/in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister', + 'info_dict': { + 'id': '5352642', + 'display_id': 'in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister', + 'ext': 'mp4', + 'title': 'Trapped in a Twister', + 'description': 'Twisters destroy Midwest towns, trapping spotters in the eye of the storm.', + 'episode_number': 1, + 'episode': 'Episode 1', + 'season_number': 1, + 'season': 'Season 1', + 'series': 'In The Eye Of The Storm', + 'duration': 2490.237, + 'upload_date': '20240715', + 'timestamp': 1721008800, + 'tags': [], + 'creators': ['Discovery'], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/10/5e39637d-cabf-3ab3-8e9a-f4e9d37bc036.jpeg', + }, + }, { 'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer', 'info_dict': { 'id': '4164906', @@ -395,6 +423,26 @@ class GoDiscoveryIE(DiscoveryPlusBaseIE): class TravelChannelIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://watch.travelchannel.com/video/the-dead-files-travel-channel/protect-the-children', + 'info_dict': { + 'id': '4710177', + 'display_id': 'the-dead-files-travel-channel/protect-the-children', + 'ext': 'mp4', + 'title': 'Protect the Children', + 'description': 'An evil presence threatens an Ohio woman\'s children and marriage.', + 'season_number': 14, + 'season': 'Season 14', + 'episode_number': 10, + 'episode': 'Episode 10', + 'series': 'The Dead Files', + 'duration': 2550.481, + 'timestamp': 1664510400, + 'upload_date': '20220930', + 'tags': [], + 'creators': ['Travel Channel'], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/17/5e45eace-de5d-343a-9293-f400a2aa77d5.jpeg', + }, + }, { 'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely', 'info_dict': { 'id': '2220256', @@ -422,6 +470,26 @@ class TravelChannelIE(DiscoveryPlusBaseIE): class CookingChannelIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://watch.cookingchanneltv.com/video/bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson', + 'info_dict': { + 'id': '5350005', + 'ext': 'mp4', + 'display_id': 'bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson', + 'title': 'Titans vs Marcus Samuelsson', + 'description': 'Marcus Samuelsson throws his legendary global tricks at the Titans.', + 'episode_number': 1, + 'episode': 'Episode 1', + 'season_number': 3, + 'season': 'Season 3', + 'series': 'Bobby\'s Triple Threat', + 'duration': 2520.851, + 'upload_date': '20240710', + 'timestamp': 1720573200, + 'tags': [], + 'creators': ['Food Network'], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/04/529cd095-27ec-35c5-84e9-90ebd3e5d2da.jpeg', + }, + }, { 'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634', 'info_dict': { 'id': '2348634', @@ -449,6 +517,22 @@ class CookingChannelIE(DiscoveryPlusBaseIE): class HGTVUsaIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://watch.hgtv.com/video/flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip', + 'info_dict': { + 'id': '5025585', + 'display_id': 'flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip', + 'ext': 'mp4', + 'title': 'Flip or Flop: The Final Flip', + 'description': 'Tarek and Christina are going their separate ways after one last flip!', + 'series': 'Flip or Flop: The Final Flip', + 'duration': 2580.644, + 'upload_date': '20231101', + 'timestamp': 1698811200, + 'tags': [], + 'creators': ['HGTV'], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/11/27/455caa6c-1462-3f14-b63d-a026d7a5e6d3.jpeg', + }, + }, { 'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house', 'info_dict': { 'id': '4289736', @@ -476,6 +560,26 @@ class HGTVUsaIE(DiscoveryPlusBaseIE): class FoodNetworkIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://watch.foodnetwork.com/video/guys-grocery-games-food-network/wild-in-the-aisles', + 'info_dict': { + 'id': '2152549', + 'display_id': 'guys-grocery-games-food-network/wild-in-the-aisles', + 'ext': 'mp4', + 'title': 'Wild in the Aisles', + 'description': 'The chefs make spaghetti and meatballs with "Out of Stock" ingredients.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'Guy\'s Grocery Games', + 'tags': [], + 'creators': ['Food Network'], + 'duration': 2520.651, + 'upload_date': '20230623', + 'timestamp': 1687492800, + 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/06/15/37fb5333-cad2-3dbb-af7c-c20ec77c89c6.jpeg', + }, + }, { 'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly', 'info_dict': { 'id': '4116449', @@ -503,6 +607,26 @@ class FoodNetworkIE(DiscoveryPlusBaseIE): class DestinationAmericaIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.destinationamerica.com/video/bbq-pit-wars-destination-america/smoke-on-the-water', + 'info_dict': { + 'id': '2218409', + 'display_id': 'bbq-pit-wars-destination-america/smoke-on-the-water', + 'ext': 'mp4', + 'title': 'Smoke on the Water', + 'description': 'The pitmasters head to Georgia for the Smoke on the Water BBQ Festival.', + 'season_number': 2, + 'season': 'Season 2', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'BBQ Pit Wars', + 'tags': [], + 'creators': ['Destination America'], + 'duration': 2614.878, + 'upload_date': '20230623', + 'timestamp': 1687492800, + 'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/c0f8e85d-9a10-3e6f-8e43-f6faafa81ba2.jpeg', + }, + }, { 'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot', 'info_dict': { 'id': '4210904', @@ -530,6 +654,26 @@ class DestinationAmericaIE(DiscoveryPlusBaseIE): class InvestigationDiscoveryIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.investigationdiscovery.com/video/deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca', + 'info_dict': { + 'id': '5341132', + 'display_id': 'deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca', + 'ext': 'mp4', + 'title': 'RIP Bianca', + 'description': 'A teenage influencer discovers an online world of threat, harm and danger.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 3, + 'episode': 'Episode 3', + 'series': 'Deadly Influence: The Social Media Murders', + 'creators': ['Investigation Discovery'], + 'tags': [], + 'duration': 2490.888, + 'upload_date': '20240618', + 'timestamp': 1718672400, + 'thumbnail': 'https://us1-prod-images.disco-api.com/2024/06/15/b567c774-9e44-3c6c-b0ba-db860a73e812.jpeg', + }, + }, { 'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown', 'info_dict': { 'id': '2139409', @@ -557,6 +701,26 @@ class InvestigationDiscoveryIE(DiscoveryPlusBaseIE): class AmHistoryChannelIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.ahctv.com/video/blood-and-fury-americas-civil-war-ahc/battle-of-bull-run', + 'info_dict': { + 'id': '2139199', + 'display_id': 'blood-and-fury-americas-civil-war-ahc/battle-of-bull-run', + 'ext': 'mp4', + 'title': 'Battle of Bull Run', + 'description': 'Two untested armies clash in the first real battle of the Civil War.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'Blood and Fury: America\'s Civil War', + 'duration': 2612.509, + 'upload_date': '20220923', + 'timestamp': 1663905600, + 'creators': ['AHC'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/4af61bd7-d705-3108-82c4-1a6e541e20fa.jpeg', + }, + }, { 'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army', 'info_dict': { 'id': '2309730', @@ -584,6 +748,26 @@ class AmHistoryChannelIE(DiscoveryPlusBaseIE): class ScienceChannelIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.sciencechannel.com/video/spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets', + 'info_dict': { + 'id': '2347335', + 'display_id': 'spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets', + 'ext': 'mp4', + 'title': 'Mystery of the Dead Planets', + 'description': 'Astronomers unmask the truly destructive nature of the cosmos.', + 'season_number': 7, + 'season': 'Season 7', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'Space\'s Deepest Secrets', + 'duration': 2524.989, + 'upload_date': '20230128', + 'timestamp': 1674882000, + 'creators': ['Science'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/30/3796829d-aead-3f9a-bd8d-e49048b3cdca.jpeg', + }, + }, { 'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine', 'info_dict': { 'id': '2842849', @@ -608,36 +792,29 @@ class ScienceChannelIE(DiscoveryPlusBaseIE): } -class DIYNetworkIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX +class DiscoveryLifeIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ - 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas', + 'url': 'https://www.discoverylife.com/video/er-files-discovery-life-atve-us/sweet-charity', 'info_dict': { - 'id': '2309730', - 'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas', + 'id': '2347614', + 'display_id': 'er-files-discovery-life-atve-us/sweet-charity', 'ext': 'mp4', - 'title': 'Bringing Beach Life to Texas', - 'description': 'The Pool Kings give a family a day at the beach in their own backyard.', - 'season_number': 10, - 'episode_number': 2, + 'title': 'Sweet Charity', + 'description': 'The staff at Charity Hospital treat a serious foot infection.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': 'ER Files', + 'duration': 2364.261, + 'upload_date': '20230721', + 'timestamp': 1689912000, + 'creators': ['Discovery Life'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/16/4b6f0124-360b-3546-b6a4-5552db886b86.jpeg', }, - 'skip': 'Available for Premium users', }, { - 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas', - 'only_matching': True, - }] - - _PRODUCT = 'diy' - _DISCO_API_PARAMS = { - 'disco_host': 'us1-prod-direct.watch.diynetwork.com', - 'realm': 'go', - 'country': 'us', - } - - -class DiscoveryLifeIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX - _TESTS = [{ 'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma', 'info_dict': { 'id': '2218238', @@ -665,6 +842,26 @@ class DiscoveryLifeIE(DiscoveryPlusBaseIE): class AnimalPlanetIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://www.animalplanet.com/video/mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru', + 'info_dict': { + 'id': '4650835', + 'display_id': 'mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru', + 'ext': 'mp4', + 'title': 'The Demon of Peru', + 'description': 'In Peru, a farming village is being terrorized by a “man-like beast.”', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 4, + 'episode': 'Episode 4', + 'series': 'Mysterious Creatures with Forrest Galante', + 'duration': 2490.488, + 'upload_date': '20230111', + 'timestamp': 1673413200, + 'creators': ['Animal Planet'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/01/6dbaa833-9a2e-3fee-9381-c19eddf67c0c.jpeg', + }, + }, { 'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown', 'info_dict': { 'id': '3338923', @@ -692,6 +889,26 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE): class TLCIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ + 'url': 'https://go.tlc.com/video/90-day-the-last-resort-tlc-atve-us/the-last-chance', + 'info_dict': { + 'id': '5186422', + 'display_id': '90-day-the-last-resort-tlc-atve-us/the-last-chance', + 'ext': 'mp4', + 'title': 'The Last Chance', + 'description': 'Infidelity shakes Kalani and Asuelu\'s world, and Angela threatens divorce.', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Episode 1', + 'series': '90 Day: The Last Resort', + 'duration': 5123.91, + 'upload_date': '20230815', + 'timestamp': 1692061200, + 'creators': ['TLC'], + 'tags': [], + 'thumbnail': 'https://us1-prod-images.disco-api.com/2023/08/08/0ee367e2-ac76-334d-bf23-dbf796696a24.jpeg', + }, + }, { 'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1', 'info_dict': { 'id': '2206540', @@ -716,93 +933,8 @@ class TLCIE(DiscoveryPlusBaseIE): } -class MotorTrendIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX - _TESTS = [{ - 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas', - 'info_dict': { - 'id': '"4859182"', - 'display_id': 'double-dakotas', - 'ext': 'mp4', - 'title': 'Double Dakotas', - 'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.', - 'season_number': 2, - 'episode_number': 3, - }, - 'skip': 'Available for Premium users', - }, { - 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas', - 'only_matching': True, - }] - - _PRODUCT = 'vel' - _DISCO_API_PARAMS = { - 'disco_host': 'us1-prod-direct.watch.motortrend.com', - 'realm': 'go', - 'country': 'us', - } - - -class MotorTrendOnDemandIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX - _TESTS = [{ - 'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784', - 'info_dict': { - 'id': '37699', - 'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699', - 'ext': 'mp4', - 'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback', - 'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7', - 'season_number': 5, - 'episode_number': 52, - 'episode': 'Episode 52', - 'season': 'Season 5', - 'thumbnail': r're:^https?://.+\.jpe?g$', - 'timestamp': 1388534401, - 'duration': 1887.345, - 'creator': 'Originals', - 'series': 'Roadkill', - 'upload_date': '20140101', - 'tags': [], - }, - }, { - 'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/', - 'info_dict': { - 'id': '4922860', - 'ext': 'mp4', - 'title': 'Roadworthy Rescues | Teaser Trailer', - 'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.', - 'display_id': 'roadworthy-rescues-teaser-trailer/4922860', - 'creator': 'Originals', - 'series': 'Roadworthy Rescues', - 'thumbnail': r're:^https?://.+\.jpe?g$', - 'upload_date': '20220907', - 'timestamp': 1662523200, - 'duration': 1066.356, - 'tags': [], - }, - }, { - 'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439', - 'only_matching': True, - }] - - _PRODUCT = 'MTOD' - _DISCO_API_PARAMS = { - 'disco_host': 'us1-prod-direct.motortrendondemand.com', - 'realm': 'motortrend', - 'country': 'us', - } - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers.update({ - 'x-disco-params': f'realm={realm}', - 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:4.39.1-gi1', - 'Authorization': self._get_auth(disco_base, display_id, realm), - }) - - class DiscoveryPlusIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P[a-z]{2})/)?video(?:/sport)?' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', 'info_dict': { @@ -823,14 +955,42 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE): }, { 'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers', 'only_matching': True, + }, { + 'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review', + 'only_matching': True, }] - _PRODUCT = 'dplus_us' - _DISCO_API_PARAMS = { - 'disco_host': 'us1-prod-direct.discoveryplus.com', - 'realm': 'go', - 'country': 'us', - } + _PRODUCT = None + _DISCO_API_PARAMS = None + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}', + 'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) + + def _real_extract(self, url): + video_id, country = self._match_valid_url(url).group('id', 'country') + if not country: + country = 'us' + + self._PRODUCT = f'dplus_{country}' + + if country in ('br', 'ca', 'us'): + self._DISCO_API_PARAMS = { + 'disco_host': 'us1-prod-direct.discoveryplus.com', + 'realm': 'go', + 'country': country, + } + else: + self._DISCO_API_PARAMS = { + 'disco_host': 'eu1-prod-direct.discoveryplus.com', + 'realm': 'dplay', + 'country': country, + } + + return self._get_disco_api_info(url, video_id, **self._DISCO_API_PARAMS) class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE): @@ -993,7 +1153,7 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE): 'only_matching': True, }] - _PRODUCT = 'dplus_us' + _PRODUCT = 'dplus_it' _DISCO_API_PARAMS = { 'disco_host': 'eu1-prod-direct.discoveryplus.com', 'realm': 'dplay', @@ -1002,8 +1162,8 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE): def _update_disco_api_headers(self, headers, disco_base, display_id, realm): headers.update({ - 'x-disco-params': f'realm={realm}', - 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6', + 'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}', + 'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}', 'Authorization': self._get_auth(disco_base, display_id, realm), }) @@ -1044,39 +1204,3 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE): _SHOW_STR = 'show' _INDEX = 4 _VIDEO_IE = DiscoveryPlusIndiaIE - - -class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P\d+)' - _TESTS = [{ - 'url': 'https://plus.globalcyclingnetwork.com/watch/1397691', - 'info_dict': { - 'id': '1397691', - 'ext': 'mp4', - 'title': 'The Athertons: Mountain Biking\'s Fastest Family', - 'description': 'md5:75a81937fcd8b989eec6083a709cd837', - 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png', - 'series': 'gcn', - 'creator': 'Gcn', - 'upload_date': '20210309', - 'timestamp': 1615248000, - 'duration': 2531.0, - 'tags': [], - }, - 'skip': 'Subscription required', - 'params': {'skip_download': 'm3u8'}, - }] - - _PRODUCT = 'web' - _DISCO_API_PARAMS = { - 'disco_host': 'disco-api-prod.globalcyclingnetwork.com', - 'realm': 'gcn', - 'country': 'us', - } - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers.update({ - 'x-disco-params': f'realm={realm}', - 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2', - 'Authorization': self._get_auth(disco_base, display_id, realm), - }) diff --git a/yt_dlp/extractor/epidemicsound.py b/yt_dlp/extractor/epidemicsound.py index 0d81b11..75b0f05 100644 --- a/yt_dlp/extractor/epidemicsound.py +++ b/yt_dlp/extractor/epidemicsound.py @@ -2,6 +2,7 @@ from .common import InfoExtractor from ..utils import ( float_or_none, int_or_none, + join_nonempty, orderedSet, parse_iso8601, parse_qs, @@ -13,7 +14,7 @@ from ..utils import ( class EpidemicSoundIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/(?:(?Psound-effects/tracks)|track)/(?P[0-9a-zA-Z-]+)' _TESTS = [{ 'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/', 'md5': 'd98ff2ddb49e8acab9716541cbc9dfac', @@ -47,6 +48,20 @@ class EpidemicSoundIE(InfoExtractor): 'release_timestamp': 1700535606, 'release_date': '20231121', }, + }, { + 'url': 'https://www.epidemicsound.com/sound-effects/tracks/2f02f54b-9faa-4daf-abac-1cfe9e9cef69/', + 'md5': '35d7cf05bd8b614a84f0495a05de9388', + 'info_dict': { + 'id': '208931', + 'ext': 'mp3', + 'upload_date': '20240603', + 'timestamp': 1717436529, + 'categories': ['appliance'], + 'display_id': '6b2NXLURPr', + 'duration': 1.0, + 'title': 'Oven, Grill, Door Open 01', + 'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg', + }, }] @staticmethod @@ -77,8 +92,10 @@ class EpidemicSoundIE(InfoExtractor): return f def _real_extract(self, url): - video_id = self._match_id(url) - json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id) + video_id, is_sfx = self._match_valid_url(url).group('id', 'sfx') + json_data = self._download_json(join_nonempty( + 'https://www.epidemicsound.com/json/track', + is_sfx and 'kosmos-id', video_id, delim='/'), video_id) thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')]) thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none})) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 3b8e1e9..04cffaa 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -43,6 +43,7 @@ from ..utils import ( xpath_text, xpath_with_ns, ) +from ..utils._utils import _UnsafeExtensionError class GenericIE(InfoExtractor): @@ -2446,9 +2447,13 @@ class GenericIE(InfoExtractor): if not is_html(first_bytes): self.report_warning( 'URL could be a direct video link, returning it as such.') + ext = determine_ext(url) + if ext not in _UnsafeExtensionError.ALLOWED_EXTENSIONS: + ext = 'unknown_video' info_dict.update({ 'direct': True, 'url': url, + 'ext': ext, }) return info_dict diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py index 726fe41..72e89c3 100644 --- a/yt_dlp/extractor/picarto.py +++ b/yt_dlp/extractor/picarto.py @@ -5,6 +5,7 @@ from ..utils import ( ExtractorError, str_or_none, traverse_obj, + update_url, ) @@ -43,15 +44,16 @@ class PicartoIE(InfoExtractor): url } }''' % (channel_id, channel_id), # noqa: UP031 - })['data'] + }, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data'] metadata = data['channel'] if metadata.get('online') == 0: raise ExtractorError('Stream is offline', expected=True) title = metadata['title'] - cdn_data = self._download_json( - data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js', + cdn_data = self._download_json(''.join(( + update_url(data['getLoadBalancerUrl']['url'], scheme='https'), + '/stream/json_', metadata['stream_name'], '.js')), channel_id, 'Downloading load balancing info') formats = [] @@ -99,10 +101,10 @@ class PicartoVodIE(InfoExtractor): }, 'skip': 'The VOD does not exist', }, { - 'url': 'https://picarto.tv/ArtofZod/videos/772650', - 'md5': '00067a0889f1f6869cc512e3e79c521b', + 'url': 'https://picarto.tv/ArtofZod/videos/771008', + 'md5': 'abef5322f2700d967720c4c6754b2a34', 'info_dict': { - 'id': '772650', + 'id': '771008', 'ext': 'mp4', 'title': 'Art of Zod - Drawing and Painting', 'thumbnail': r're:^https?://.*\.jpg', @@ -131,7 +133,7 @@ class PicartoVodIE(InfoExtractor): }} }} }}''', - })['data']['video'] + }, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data']['video'] file_name = data['file_name'] netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index afb512d..4f8d964 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -871,7 +871,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE): 'id': '30909869', 'title': 'neilcic', }, - 'playlist_mincount': 23, + 'playlist_mincount': 22, }] def _real_extract(self, url): @@ -880,7 +880,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE): self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS) return self._extract_playlist( - f'{self._API_V2_BASE}stream/users/{user["id"]}', str(user['id']), user.get('username')) + f'{self._API_V2_BASE}users/{user["id"]}/tracks', str(user['id']), user.get('username')) class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index c3505b1..aa1dcec 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1458,9 +1458,11 @@ class TikTokLiveIE(TikTokBaseIE): if webpage: data = self._get_sigi_state(webpage, uploader or room_id) - room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False) - or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None) - or room_id) + room_id = ( + traverse_obj(data, (( + ('LiveRoom', 'liveRoomUserInfo', 'user'), + ('UserModule', 'users', ...)), 'roomId', {str}, any)) + or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=room_id)) uploader = uploader or traverse_obj( data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'), ('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str) diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py index 52ff230..953eb77 100644 --- a/yt_dlp/extractor/tv5mondeplus.py +++ b/yt_dlp/extractor/tv5mondeplus.py @@ -96,7 +96,7 @@ class TV5MondePlusIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + webpage = self._download_webpage(url, display_id, impersonate=True) if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage: self.raise_geo_restricted(countries=['FR']) @@ -122,8 +122,9 @@ class TV5MondePlusIE(InfoExtractor): if not token: continue deferred_json = self._download_json( - f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id, - note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False) + f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', + display_id, 'Downloading deferred info', fatal=False, impersonate=True, + headers={'Authorization': f'Bearer {token}'}) v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none})) if not v_url: continue diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1c0a70d..53aca38 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -270,7 +270,7 @@ def build_innertube_clients(): THIRD_PARTY = { 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL } - BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb') + BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android') priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): @@ -1294,6 +1294,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'}, } _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') + _POTOKEN_EXPERIMENTS = ('51217476', '51217102') _GEO_BYPASS = False @@ -3142,7 +3143,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_code(self, video_id, player_url): player_id = self._extract_player_info(player_url) - func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1') + func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09') jscode = func_code or self._load_player(video_id, player_url) jsi = JSInterpreter(jscode) @@ -3701,8 +3702,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return pr_id def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): - initial_pr = None + initial_pr = ignore_initial_response = None if webpage: + if 'web' in clients: + experiments = traverse_obj(master_ytcfg, ( + 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...)) + if all(x in experiments for x in self._POTOKEN_EXPERIMENTS): + self.report_warning( + 'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response') + ignore_initial_response = True initial_pr = self._search_json( self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) @@ -3732,8 +3740,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): skipped_clients = {} while clients: client, base_client, variant = _split_innertube_client(clients.pop()) - player_ytcfg = master_ytcfg if client == 'web' else {} - if 'configs' not in self._configuration_arg('player_skip') and client != 'web': + player_ytcfg = {} + if client == 'web': + player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg + elif 'configs' not in self._configuration_arg('player_skip'): player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) @@ -3746,11 +3756,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_url = self._download_player_url(video_id) tried_iframe_fallback = True - try: - pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( - client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) - except ExtractorError as e: - self.report_warning(e) + pr = initial_pr if client == 'web' and not ignore_initial_response else None + for retry in self.RetryManager(fatal=False): + try: + pr = pr or self._extract_player_response( + client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, + player_url if require_js_player else None, initial_pr, smuggled_data) + except ExtractorError as e: + self.report_warning(e) + break + experiments = traverse_obj(pr, ( + 'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK', + 'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...)) + if all(x in experiments for x in self._POTOKEN_EXPERIMENTS): + pr = None + retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True) + if not pr: continue if pr_id := self._invalid_player_response(pr, video_id): -- cgit v1.2.3