summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 09:07:17 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 09:07:17 +0000
commit423ad025db8992beb1cfde9800c4bbc37426fc82 (patch)
treec6f676600b2362994be97673158ceb77996e6f71 /yt_dlp/extractor
parentAdding debian version 2024.07.09-1. (diff)
downloadyt-dlp-423ad025db8992beb1cfde9800c4bbc37426fc82.tar.xz
yt-dlp-423ad025db8992beb1cfde9800c4bbc37426fc82.zip
Merging upstream version 2024.07.16.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/_extractors.py5
-rw-r--r--yt_dlp/extractor/adn.py23
-rw-r--r--yt_dlp/extractor/afreecatv.py22
-rw-r--r--yt_dlp/extractor/box.py20
-rw-r--r--yt_dlp/extractor/digitalconcerthall.py42
-rw-r--r--yt_dlp/extractor/discovery.py115
-rw-r--r--yt_dlp/extractor/discoverygo.py171
-rw-r--r--yt_dlp/extractor/dplay.py438
-rw-r--r--yt_dlp/extractor/epidemicsound.py23
-rw-r--r--yt_dlp/extractor/generic.py5
-rw-r--r--yt_dlp/extractor/picarto.py16
-rw-r--r--yt_dlp/extractor/soundcloud.py4
-rw-r--r--yt_dlp/extractor/tiktok.py8
-rw-r--r--yt_dlp/extractor/tv5mondeplus.py7
-rw-r--r--yt_dlp/extractor/youtube.py41
15 files changed, 430 insertions, 510 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index fc917ff..d2140bc 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -504,7 +504,6 @@ from .dhm import DHMIE
from .digitalconcerthall import DigitalConcertHallIE
from .digiteka import DigitekaIE
from .discogs import DiscogsReleasePlaylistIE
-from .discovery import DiscoveryIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .dlf import (
@@ -532,16 +531,12 @@ from .dplay import (
DiscoveryPlusIndiaShowIE,
DiscoveryPlusItalyIE,
DiscoveryPlusItalyShowIE,
- DIYNetworkIE,
DPlayIE,
FoodNetworkIE,
- GlobalCyclingNetworkPlusIE,
GoDiscoveryIE,
HGTVDeIE,
HGTVUsaIE,
InvestigationDiscoveryIE,
- MotorTrendIE,
- MotorTrendOnDemandIE,
ScienceChannelIE,
TravelChannelIE,
)
diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py
index 7be990b..3370717 100644
--- a/yt_dlp/extractor/adn.py
+++ b/yt_dlp/extractor/adn.py
@@ -16,6 +16,7 @@ from ..utils import (
float_or_none,
int_or_none,
intlist_to_bytes,
+ join_nonempty,
long_to_bytes,
parse_iso8601,
pkcs1pad,
@@ -48,9 +49,9 @@ class ADNBaseIE(InfoExtractor):
class ADNIE(ADNBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/[^/?#]+/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/[^/?#]+/(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
+ 'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir',
'md5': '1c9ef066ceb302c86f80c2b371615261',
'info_dict': {
'id': '9841',
@@ -70,10 +71,10 @@ class ADNIE(ADNBaseIE):
},
'skip': 'Only available in French and German speaking Europe',
}, {
- 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
+ 'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
'only_matching': True,
}, {
- 'url': 'https://animationdigitalnetwork.de/video/the-eminence-in-shadow/23550-folge-1',
+ 'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1',
'md5': '5c5651bf5791fa6fcd7906012b9d94e8',
'info_dict': {
'id': '23550',
@@ -217,7 +218,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
links_data = self._download_json(
links_url, video_id, 'Downloading links JSON metadata', headers={
'X-Player-Token': authorization,
- 'X-Target-Distribution': lang,
+ 'X-Target-Distribution': lang or 'fr',
**self._HEADERS,
}, query={
'freeWithAds': 'true',
@@ -298,9 +299,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
class ADNSeasonIE(ADNBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.(?P<lang>fr|de)/video/(?P<id>[^/?#]+)/?(?:$|[#?])'
+ _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?P<lang>de)/)?video/(?P<id>[^/?#]+)/?(?:$|[#?])'
_TESTS = [{
- 'url': 'https://animationdigitalnetwork.fr/video/tokyo-mew-mew-new',
+ 'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new',
'playlist_count': 12,
'info_dict': {
'id': '911',
@@ -318,7 +319,7 @@ class ADNSeasonIE(ADNBaseIE):
episodes = self._download_json(
f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug,
'Downloading episode list', headers={
- 'X-Target-Distribution': lang,
+ 'X-Target-Distribution': lang or 'fr',
**self._HEADERS,
}, query={
'order': 'asc',
@@ -327,8 +328,8 @@ class ADNSeasonIE(ADNBaseIE):
def entries():
for episode_id in traverse_obj(episodes, ('videos', ..., 'id', {str_or_none})):
- yield self.url_result(
- f'https://animationdigitalnetwork.{lang}/video/{video_show_slug}/{episode_id}',
- ADNIE, episode_id)
+ yield self.url_result(join_nonempty(
+ 'https://animationdigitalnetwork.com', lang, 'video',
+ video_show_slug, episode_id, delim='/'), ADNIE, episode_id)
return self.playlist_result(entries(), show_id, show.get('title'))
diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py
index f51b5a6..815d205 100644
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@@ -1,6 +1,7 @@
import functools
from .common import InfoExtractor
+from ..networking import Request
from ..utils import (
ExtractorError,
OnDemandPagedList,
@@ -58,6 +59,13 @@ class AfreecaTVBaseIE(InfoExtractor):
f'Unable to login: {self.IE_NAME} said: {error}',
expected=True)
+ def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
+ return self._download_json(Request(
+ f'https://api.m.afreecatv.com/{endpoint}',
+ data=data, headers=headers, query=query,
+ extensions={'legacy_ssl': True}), display_id,
+ 'Downloading API JSON', 'Unable to download API JSON')
+
class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv'
@@ -184,12 +192,12 @@ class AfreecaTVIE(AfreecaTVBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- data = self._download_json(
- 'https://api.m.afreecatv.com/station/video/a/view', video_id,
- headers={'Referer': url}, data=urlencode_postdata({
+ data = self._call_api(
+ 'station/video/a/view', video_id, headers={'Referer': url},
+ data=urlencode_postdata({
'nTitleNo': video_id,
'nApiLevel': 10,
- }), impersonate=True)['data']
+ }))['data']
error_code = traverse_obj(data, ('code', {int}))
if error_code == -6221:
@@ -267,9 +275,9 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- data = self._download_json(
- 'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url},
- query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True)
+ data = self._call_api(
+ 'catchstory/a/view', video_id, headers={'Referer': url},
+ query={'aStoryListIdx': '', 'nStoryIdx': video_id})
return self.playlist_result(self._entries(data), video_id)
diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py
index 3547ad9..f06339f 100644
--- a/yt_dlp/extractor/box.py
+++ b/yt_dlp/extractor/box.py
@@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
class BoxIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
+ _VALID_URL = r'https?://(?:[^.]+\.)?(?P<service>app|ent)\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
_TESTS = [{
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
@@ -38,10 +38,22 @@ class BoxIE(InfoExtractor):
'uploader_id': '239068974',
},
'params': {'skip_download': 'dash fragment too small'},
+ }, {
+ 'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065',
+ 'info_dict': {
+ 'id': '1536173056065',
+ 'ext': 'mp4',
+ 'uploader_id': '18523128264',
+ 'uploader': 'Lexi Hennigan',
+ 'title': 'iPSC Symposium recording part 1.mp4',
+ 'timestamp': 1716228343,
+ 'upload_date': '20240520',
+ },
+ 'params': {'skip_download': 'dash fragment too small'},
}]
def _real_extract(self, url):
- shared_name, file_id = self._match_valid_url(url).groups()
+ shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service')
webpage = self._download_webpage(url, file_id or shared_name)
if not file_id:
@@ -57,14 +69,14 @@ class BoxIE(InfoExtractor):
request_token = self._search_json(
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
access_token = self._download_json(
- 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
+ f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id,
'Downloading token JSON metadata',
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
'Content-Type': 'application/json',
'X-Request-Token': request_token,
'X-Box-EndUser-API': 'sharedName=' + shared_name,
})[file_id]['read']
- shared_link = 'https://app.box.com/s/' + shared_name
+ shared_link = f'https://{service}.box.com/s/{shared_name}'
f = self._download_json(
'https://api.box.com/2.0/files/' + file_id, file_id,
'Downloading file JSON metadata', headers={
diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py
index 8b4d5c0..edb6fa9 100644
--- a/yt_dlp/extractor/digitalconcerthall.py
+++ b/yt_dlp/extractor/digitalconcerthall.py
@@ -1,6 +1,8 @@
from .common import InfoExtractor
+from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
+ parse_codecs,
try_get,
url_or_none,
urlencode_postdata,
@@ -12,6 +14,7 @@ class DigitalConcertHallIE(InfoExtractor):
IE_DESC = 'DigitalConcertHall extractor'
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
+ _USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
_ACCESS_TOKEN = None
_NETRC_MACHINE = 'digitalconcerthall'
_TESTS = [{
@@ -68,33 +71,42 @@ class DigitalConcertHallIE(InfoExtractor):
}]
def _perform_login(self, username, password):
- token_response = self._download_json(
+ login_token = self._download_json(
self._OAUTH_URL,
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
'affiliate': 'none',
'grant_type': 'device',
'device_vendor': 'unknown',
+ # device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio
+ 'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari',
'app_id': 'dch.webapp',
- 'app_version': '1.0.0',
+ 'app_distributor': 'berlinphil',
+ 'app_version': '1.84.0',
'client_secret': '2ySLN+2Fwb',
}), headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
- self._ACCESS_TOKEN = token_response['access_token']
+ 'Accept': 'application/json',
+ 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
+ 'User-Agent': self._USER_AGENT,
+ })['access_token']
try:
- self._download_json(
+ login_response = self._download_json(
self._OAUTH_URL,
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({
'grant_type': 'password',
'username': username,
'password': password,
}), headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Accept': 'application/json',
+ 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Referer': 'https://www.digitalconcerthall.com',
- 'Authorization': f'Bearer {self._ACCESS_TOKEN}',
+ 'Authorization': f'Bearer {login_token}',
+ 'User-Agent': self._USER_AGENT,
})
- except ExtractorError:
- self.raise_login_required(msg='Login info incorrect')
+ except ExtractorError as error:
+ if isinstance(error.cause, HTTPError) and error.cause.status == 401:
+ raise ExtractorError('Invalid username or password', expected=True)
+ raise
+ self._ACCESS_TOKEN = login_response['access_token']
def _real_initialize(self):
if not self._ACCESS_TOKEN:
@@ -108,11 +120,15 @@ class DigitalConcertHallIE(InfoExtractor):
'Accept': 'application/json',
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
'Accept-Language': language,
+ 'User-Agent': self._USER_AGENT,
})
formats = []
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
- formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False))
+ formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ for fmt in formats:
+ if fmt.get('format_note') and fmt.get('vcodec') == 'none':
+ fmt.update(parse_codecs(fmt['format_note']))
yield {
'id': video_id,
@@ -140,13 +156,15 @@ class DigitalConcertHallIE(InfoExtractor):
f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={
'Accept': 'application/json',
'Accept-Language': language,
+ 'User-Agent': self._USER_AGENT,
+ 'Authorization': f'Bearer {self._ACCESS_TOKEN}',
})
- album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name'))
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
if type_ == 'work':
videos = [videos[int(part) - 1]]
+ album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name', {str}))
thumbnail = traverse_obj(vid_info, (
'image', ..., {self._proto_relative_url}, {url_or_none},
{lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size
diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py
deleted file mode 100644
index b98279d..0000000
--- a/yt_dlp/extractor/discovery.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import random
-import string
-import urllib.parse
-
-from .discoverygo import DiscoveryGoBaseIE
-from ..networking.exceptions import HTTPError
-from ..utils import ExtractorError
-
-
-class DiscoveryIE(DiscoveryGoBaseIE):
- _VALID_URL = r'''(?x)https?://
- (?P<site>
- go\.discovery|
- www\.
- (?:
- investigationdiscovery|
- discoverylife|
- animalplanet|
- ahctv|
- destinationamerica|
- sciencechannel|
- tlc
- )|
- watch\.
- (?:
- hgtv|
- foodnetwork|
- travelchannel|
- diynetwork|
- cookingchanneltv|
- motortrend
- )
- )\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
- _TESTS = [{
- 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
- 'info_dict': {
- 'id': '5a2f35ce6b66d17a5026e29e',
- 'ext': 'mp4',
- 'title': 'Riding with Matthew Perry',
- 'description': 'md5:a34333153e79bc4526019a5129e7f878',
- 'duration': 84,
- },
- 'params': {
- 'skip_download': True, # requires ffmpeg
- },
- }, {
- 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
- 'only_matching': True,
- }, {
- 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
- 'only_matching': True,
- }, {
- # using `show_slug` is important to get the correct video data
- 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
- 'only_matching': True,
- }]
- _GEO_COUNTRIES = ['US']
- _GEO_BYPASS = False
- _API_BASE_URL = 'https://api.discovery.com/v1/'
-
- def _real_extract(self, url):
- site, show_slug, display_id = self._match_valid_url(url).groups()
-
- access_token = None
- cookies = self._get_cookies(url)
-
- # prefer Affiliate Auth Token over Anonymous Auth Token
- auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
- if auth_storage_cookie and auth_storage_cookie.value:
- auth_storage = self._parse_json(urllib.parse.unquote(
- urllib.parse.unquote(auth_storage_cookie.value)),
- display_id, fatal=False) or {}
- access_token = auth_storage.get('a') or auth_storage.get('access_token')
-
- if not access_token:
- access_token = self._download_json(
- f'https://{site}.com/anonymous', display_id,
- 'Downloading token JSON metadata', query={
- 'authRel': 'authorization',
- 'client_id': '3020a40c2356a645b4b4',
- 'nonce': ''.join(random.choices(string.ascii_letters, k=32)),
- 'redirectUri': 'https://www.discovery.com/',
- })['access_token']
-
- headers = self.geo_verification_headers()
- headers['Authorization'] = 'Bearer ' + access_token
-
- try:
- video = self._download_json(
- self._API_BASE_URL + 'content/videos',
- display_id, 'Downloading content JSON metadata',
- headers=headers, query={
- 'embed': 'show.name',
- 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
- 'slug': display_id,
- 'show_slug': show_slug,
- })[0]
- video_id = video['id']
- stream = self._download_json(
- self._API_BASE_URL + 'streaming/video/' + video_id,
- display_id, 'Downloading streaming JSON metadata', headers=headers)
- except ExtractorError as e:
- if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
- e_description = self._parse_json(
- e.cause.response.read().decode(), display_id)['description']
- if 'resource not available for country' in e_description:
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
- if 'Authorized Networks' in e_description:
- raise ExtractorError(
- 'This video is only available via cable service provider subscription that'
- ' is not currently supported. You may want to use --cookies.', expected=True)
- raise ExtractorError(e_description)
- raise
-
- return self._extract_video_info(video, stream, display_id)
diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py
deleted file mode 100644
index 9649485..0000000
--- a/yt_dlp/extractor/discoverygo.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- determine_ext,
- extract_attributes,
- int_or_none,
- parse_age_limit,
- remove_end,
- unescapeHTML,
- url_or_none,
-)
-
-
-class DiscoveryGoBaseIE(InfoExtractor):
- _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
- discovery|
- investigationdiscovery|
- discoverylife|
- animalplanet|
- ahctv|
- destinationamerica|
- sciencechannel|
- tlc|
- velocitychannel
- )go\.com/%s(?P<id>[^/?#&]+)'''
-
- def _extract_video_info(self, video, stream, display_id):
- title = video['name']
-
- if not stream:
- if video.get('authenticated') is True:
- raise ExtractorError(
- 'This video is only available via cable service provider subscription that'
- ' is not currently supported. You may want to use --cookies.', expected=True)
- else:
- raise ExtractorError('Unable to find stream')
- STREAM_URL_SUFFIX = 'streamUrl'
- formats = []
- for stream_kind in ('', 'hds'):
- suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
- stream_url = stream.get(f'{stream_kind}{suffix}')
- if not stream_url:
- continue
- if stream_kind == '':
- formats.extend(self._extract_m3u8_formats(
- stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif stream_kind == 'hds':
- formats.extend(self._extract_f4m_formats(
- stream_url, display_id, f4m_id=stream_kind, fatal=False))
-
- video_id = video.get('id') or display_id
- description = video.get('description', {}).get('detailed')
- duration = int_or_none(video.get('duration'))
-
- series = video.get('show', {}).get('name')
- season_number = int_or_none(video.get('season', {}).get('number'))
- episode_number = int_or_none(video.get('episodeNumber'))
-
- tags = video.get('tags')
- age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
-
- subtitles = {}
- captions = stream.get('captions')
- if isinstance(captions, list):
- for caption in captions:
- subtitle_url = url_or_none(caption.get('fileUrl'))
- if not subtitle_url or not subtitle_url.startswith('http'):
- continue
- lang = caption.get('fileLang', 'en')
- ext = determine_ext(subtitle_url)
- subtitles.setdefault(lang, []).append({
- 'url': subtitle_url,
- 'ext': 'ttml' if ext == 'xml' else ext,
- })
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'series': series,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'tags': tags,
- 'age_limit': age_limit,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class DiscoveryGoIE(DiscoveryGoBaseIE):
- _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
- _GEO_COUNTRIES = ['US']
- _TEST = {
- 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
- 'info_dict': {
- 'id': '58c167d86b66d12f2addeb01',
- 'ext': 'mp4',
- 'title': 'Reaper Madness',
- 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
- 'duration': 2519,
- 'series': 'Bering Sea Gold',
- 'season_number': 8,
- 'episode_number': 6,
- 'age_limit': 14,
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- container = extract_attributes(
- self._search_regex(
- r'(<div[^>]+class=["\']video-player-container[^>]+>)',
- webpage, 'video container'))
-
- video = self._parse_json(
- container.get('data-video') or container.get('data-json'),
- display_id)
-
- stream = video.get('stream')
-
- return self._extract_video_info(video, stream, display_id)
-
-
-class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
- _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
- _TEST = {
- 'url': 'https://www.discoverygo.com/bering-sea-gold/',
- 'info_dict': {
- 'id': 'bering-sea-gold',
- 'title': 'Bering Sea Gold',
- 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
- },
- 'playlist_mincount': 6,
- }
-
- @classmethod
- def suitable(cls, url):
- return False if DiscoveryGoIE.suitable(url) else super().suitable(url)
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- entries = []
- for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
- data = self._parse_json(
- mobj.group('json'), display_id,
- transform_source=unescapeHTML, fatal=False)
- if not isinstance(data, dict) or data.get('type') != 'episode':
- continue
- episode_url = data.get('socialUrl')
- if not episode_url:
- continue
- entries.append(self.url_result(
- episode_url, ie=DiscoveryGoIE.ie_key(),
- video_id=data.get('id')))
-
- return self.playlist_result(
- entries, display_id,
- remove_end(self._og_search_title(
- webpage, fatal=False), ' | Discovery GO'),
- self._og_search_description(webpage))
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index 48eae10..e9f9357 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -346,8 +346,16 @@ class HGTVDeIE(DPlayBaseIE):
class DiscoveryPlusBaseIE(DPlayBaseIE):
+ """Subclasses must set _PRODUCT, _DISCO_API_PARAMS"""
+
+ _DISCO_CLIENT_VER = '27.43.0'
+
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
- headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6'
+ headers.update({
+ 'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
+ 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:{self._DISCO_CLIENT_VER}',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
def _download_video_playback_info(self, disco_base, video_id, headers):
return self._download_json(
@@ -368,6 +376,26 @@ class DiscoveryPlusBaseIE(DPlayBaseIE):
class GoDiscoveryIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:go\.)?discovery\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://go.discovery.com/video/in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister',
+ 'info_dict': {
+ 'id': '5352642',
+ 'display_id': 'in-the-eye-of-the-storm-discovery-atve-us/trapped-in-a-twister',
+ 'ext': 'mp4',
+ 'title': 'Trapped in a Twister',
+ 'description': 'Twisters destroy Midwest towns, trapping spotters in the eye of the storm.',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'series': 'In The Eye Of The Storm',
+ 'duration': 2490.237,
+ 'upload_date': '20240715',
+ 'timestamp': 1721008800,
+ 'tags': [],
+ 'creators': ['Discovery'],
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/10/5e39637d-cabf-3ab3-8e9a-f4e9d37bc036.jpeg',
+ },
+ }, {
'url': 'https://go.discovery.com/video/dirty-jobs-discovery-atve-us/rodbuster-galvanizer',
'info_dict': {
'id': '4164906',
@@ -395,6 +423,26 @@ class GoDiscoveryIE(DiscoveryPlusBaseIE):
class TravelChannelIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:watch\.)?travelchannel\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://watch.travelchannel.com/video/the-dead-files-travel-channel/protect-the-children',
+ 'info_dict': {
+ 'id': '4710177',
+ 'display_id': 'the-dead-files-travel-channel/protect-the-children',
+ 'ext': 'mp4',
+ 'title': 'Protect the Children',
+ 'description': 'An evil presence threatens an Ohio woman\'s children and marriage.',
+ 'season_number': 14,
+ 'season': 'Season 14',
+ 'episode_number': 10,
+ 'episode': 'Episode 10',
+ 'series': 'The Dead Files',
+ 'duration': 2550.481,
+ 'timestamp': 1664510400,
+ 'upload_date': '20220930',
+ 'tags': [],
+ 'creators': ['Travel Channel'],
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/17/5e45eace-de5d-343a-9293-f400a2aa77d5.jpeg',
+ },
+ }, {
'url': 'https://watch.travelchannel.com/video/ghost-adventures-travel-channel/ghost-train-of-ely',
'info_dict': {
'id': '2220256',
@@ -422,6 +470,26 @@ class TravelChannelIE(DiscoveryPlusBaseIE):
class CookingChannelIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:watch\.)?cookingchanneltv\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://watch.cookingchanneltv.com/video/bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson',
+ 'info_dict': {
+ 'id': '5350005',
+ 'ext': 'mp4',
+ 'display_id': 'bobbys-triple-threat-food-network-atve-us/titans-vs-marcus-samuelsson',
+ 'title': 'Titans vs Marcus Samuelsson',
+ 'description': 'Marcus Samuelsson throws his legendary global tricks at the Titans.',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'season_number': 3,
+ 'season': 'Season 3',
+ 'series': 'Bobby\'s Triple Threat',
+ 'duration': 2520.851,
+ 'upload_date': '20240710',
+ 'timestamp': 1720573200,
+ 'tags': [],
+ 'creators': ['Food Network'],
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2024/07/04/529cd095-27ec-35c5-84e9-90ebd3e5d2da.jpeg',
+ },
+ }, {
'url': 'https://watch.cookingchanneltv.com/video/carnival-eats-cooking-channel/the-postman-always-brings-rice-2348634',
'info_dict': {
'id': '2348634',
@@ -449,6 +517,22 @@ class CookingChannelIE(DiscoveryPlusBaseIE):
class HGTVUsaIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:watch\.)?hgtv\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://watch.hgtv.com/video/flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip',
+ 'info_dict': {
+ 'id': '5025585',
+ 'display_id': 'flip-or-flop-the-final-flip-hgtv-atve-us/flip-or-flop-the-final-flip',
+ 'ext': 'mp4',
+ 'title': 'Flip or Flop: The Final Flip',
+ 'description': 'Tarek and Christina are going their separate ways after one last flip!',
+ 'series': 'Flip or Flop: The Final Flip',
+ 'duration': 2580.644,
+ 'upload_date': '20231101',
+ 'timestamp': 1698811200,
+ 'tags': [],
+ 'creators': ['HGTV'],
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/11/27/455caa6c-1462-3f14-b63d-a026d7a5e6d3.jpeg',
+ },
+ }, {
'url': 'https://watch.hgtv.com/video/home-inspector-joe-hgtv-atve-us/this-mold-house',
'info_dict': {
'id': '4289736',
@@ -476,6 +560,26 @@ class HGTVUsaIE(DiscoveryPlusBaseIE):
class FoodNetworkIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:watch\.)?foodnetwork\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://watch.foodnetwork.com/video/guys-grocery-games-food-network/wild-in-the-aisles',
+ 'info_dict': {
+ 'id': '2152549',
+ 'display_id': 'guys-grocery-games-food-network/wild-in-the-aisles',
+ 'ext': 'mp4',
+ 'title': 'Wild in the Aisles',
+ 'description': 'The chefs make spaghetti and meatballs with "Out of Stock" ingredients.',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'series': 'Guy\'s Grocery Games',
+ 'tags': [],
+ 'creators': ['Food Network'],
+ 'duration': 2520.651,
+ 'upload_date': '20230623',
+ 'timestamp': 1687492800,
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/06/15/37fb5333-cad2-3dbb-af7c-c20ec77c89c6.jpeg',
+ },
+ }, {
'url': 'https://watch.foodnetwork.com/video/kids-baking-championship-food-network/float-like-a-butterfly',
'info_dict': {
'id': '4116449',
@@ -503,6 +607,26 @@ class FoodNetworkIE(DiscoveryPlusBaseIE):
class DestinationAmericaIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?destinationamerica\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://www.destinationamerica.com/video/bbq-pit-wars-destination-america/smoke-on-the-water',
+ 'info_dict': {
+ 'id': '2218409',
+ 'display_id': 'bbq-pit-wars-destination-america/smoke-on-the-water',
+ 'ext': 'mp4',
+ 'title': 'Smoke on the Water',
+ 'description': 'The pitmasters head to Georgia for the Smoke on the Water BBQ Festival.',
+ 'season_number': 2,
+ 'season': 'Season 2',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'series': 'BBQ Pit Wars',
+ 'tags': [],
+ 'creators': ['Destination America'],
+ 'duration': 2614.878,
+ 'upload_date': '20230623',
+ 'timestamp': 1687492800,
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/c0f8e85d-9a10-3e6f-8e43-f6faafa81ba2.jpeg',
+ },
+ }, {
'url': 'https://www.destinationamerica.com/video/alaska-monsters-destination-america-atve-us/central-alaskas-bigfoot',
'info_dict': {
'id': '4210904',
@@ -530,6 +654,26 @@ class DestinationAmericaIE(DiscoveryPlusBaseIE):
class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?investigationdiscovery\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://www.investigationdiscovery.com/video/deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca',
+ 'info_dict': {
+ 'id': '5341132',
+ 'display_id': 'deadly-influence-the-social-media-murders-investigation-discovery-atve-us/rip-bianca',
+ 'ext': 'mp4',
+ 'title': 'RIP Bianca',
+ 'description': 'A teenage influencer discovers an online world of threat, harm and danger.',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 3,
+ 'episode': 'Episode 3',
+ 'series': 'Deadly Influence: The Social Media Murders',
+ 'creators': ['Investigation Discovery'],
+ 'tags': [],
+ 'duration': 2490.888,
+ 'upload_date': '20240618',
+ 'timestamp': 1718672400,
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2024/06/15/b567c774-9e44-3c6c-b0ba-db860a73e812.jpeg',
+ },
+ }, {
'url': 'https://www.investigationdiscovery.com/video/unmasked-investigation-discovery/the-killer-clown',
'info_dict': {
'id': '2139409',
@@ -557,6 +701,26 @@ class InvestigationDiscoveryIE(DiscoveryPlusBaseIE):
class AmHistoryChannelIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?ahctv\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://www.ahctv.com/video/blood-and-fury-americas-civil-war-ahc/battle-of-bull-run',
+ 'info_dict': {
+ 'id': '2139199',
+ 'display_id': 'blood-and-fury-americas-civil-war-ahc/battle-of-bull-run',
+ 'ext': 'mp4',
+ 'title': 'Battle of Bull Run',
+ 'description': 'Two untested armies clash in the first real battle of the Civil War.',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'series': 'Blood and Fury: America\'s Civil War',
+ 'duration': 2612.509,
+ 'upload_date': '20220923',
+ 'timestamp': 1663905600,
+ 'creators': ['AHC'],
+ 'tags': [],
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2020/05/11/4af61bd7-d705-3108-82c4-1a6e541e20fa.jpeg',
+ },
+ }, {
'url': 'https://www.ahctv.com/video/modern-sniper-ahc/army',
'info_dict': {
'id': '2309730',
@@ -584,6 +748,26 @@ class AmHistoryChannelIE(DiscoveryPlusBaseIE):
class ScienceChannelIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://www.sciencechannel.com/video/spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets',
+ 'info_dict': {
+ 'id': '2347335',
+ 'display_id': 'spaces-deepest-secrets-science-atve-us/mystery-of-the-dead-planets',
+ 'ext': 'mp4',
+ 'title': 'Mystery of the Dead Planets',
+ 'description': 'Astronomers unmask the truly destructive nature of the cosmos.',
+ 'season_number': 7,
+ 'season': 'Season 7',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'series': 'Space\'s Deepest Secrets',
+ 'duration': 2524.989,
+ 'upload_date': '20230128',
+ 'timestamp': 1674882000,
+ 'creators': ['Science'],
+ 'tags': [],
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/30/3796829d-aead-3f9a-bd8d-e49048b3cdca.jpeg',
+ },
+ }, {
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
'info_dict': {
'id': '2842849',
@@ -608,36 +792,29 @@ class ScienceChannelIE(DiscoveryPlusBaseIE):
}
-class DIYNetworkIE(DiscoveryPlusBaseIE):
- _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX
+class DiscoveryLifeIE(DiscoveryPlusBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
- 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
+ 'url': 'https://www.discoverylife.com/video/er-files-discovery-life-atve-us/sweet-charity',
'info_dict': {
- 'id': '2309730',
- 'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas',
+ 'id': '2347614',
+ 'display_id': 'er-files-discovery-life-atve-us/sweet-charity',
'ext': 'mp4',
- 'title': 'Bringing Beach Life to Texas',
- 'description': 'The Pool Kings give a family a day at the beach in their own backyard.',
- 'season_number': 10,
- 'episode_number': 2,
+ 'title': 'Sweet Charity',
+ 'description': 'The staff at Charity Hospital treat a serious foot infection.',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'series': 'ER Files',
+ 'duration': 2364.261,
+ 'upload_date': '20230721',
+ 'timestamp': 1689912000,
+ 'creators': ['Discovery Life'],
+ 'tags': [],
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/16/4b6f0124-360b-3546-b6a4-5552db886b86.jpeg',
},
- 'skip': 'Available for Premium users',
}, {
- 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
- 'only_matching': True,
- }]
-
- _PRODUCT = 'diy'
- _DISCO_API_PARAMS = {
- 'disco_host': 'us1-prod-direct.watch.diynetwork.com',
- 'realm': 'go',
- 'country': 'us',
- }
-
-
-class DiscoveryLifeIE(DiscoveryPlusBaseIE):
- _VALID_URL = r'https?://(?:www\.)?discoverylife\.com/video' + DPlayBaseIE._PATH_REGEX
- _TESTS = [{
'url': 'https://www.discoverylife.com/video/surviving-death-discovery-life-atve-us/bodily-trauma',
'info_dict': {
'id': '2218238',
@@ -665,6 +842,26 @@ class DiscoveryLifeIE(DiscoveryPlusBaseIE):
class AnimalPlanetIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://www.animalplanet.com/video/mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru',
+ 'info_dict': {
+ 'id': '4650835',
+ 'display_id': 'mysterious-creatures-with-forrest-galante-animal-planet-atve-us/the-demon-of-peru',
+ 'ext': 'mp4',
+ 'title': 'The Demon of Peru',
+ 'description': 'In Peru, a farming village is being terrorized by a “man-like beast.”',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 4,
+ 'episode': 'Episode 4',
+ 'series': 'Mysterious Creatures with Forrest Galante',
+ 'duration': 2490.488,
+ 'upload_date': '20230111',
+ 'timestamp': 1673413200,
+ 'creators': ['Animal Planet'],
+ 'tags': [],
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2022/03/01/6dbaa833-9a2e-3fee-9381-c19eddf67c0c.jpeg',
+ },
+ }, {
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
'info_dict': {
'id': '3338923',
@@ -692,6 +889,26 @@ class AnimalPlanetIE(DiscoveryPlusBaseIE):
class TLCIE(DiscoveryPlusBaseIE):
_VALID_URL = r'https?://(?:go\.)?tlc\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
+ 'url': 'https://go.tlc.com/video/90-day-the-last-resort-tlc-atve-us/the-last-chance',
+ 'info_dict': {
+ 'id': '5186422',
+ 'display_id': '90-day-the-last-resort-tlc-atve-us/the-last-chance',
+ 'ext': 'mp4',
+ 'title': 'The Last Chance',
+ 'description': 'Infidelity shakes Kalani and Asuelu\'s world, and Angela threatens divorce.',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'series': '90 Day: The Last Resort',
+ 'duration': 5123.91,
+ 'upload_date': '20230815',
+ 'timestamp': 1692061200,
+ 'creators': ['TLC'],
+ 'tags': [],
+ 'thumbnail': 'https://us1-prod-images.disco-api.com/2023/08/08/0ee367e2-ac76-334d-bf23-dbf796696a24.jpeg',
+ },
+ }, {
'url': 'https://go.tlc.com/video/my-600-lb-life-tlc/melissas-story-part-1',
'info_dict': {
'id': '2206540',
@@ -716,93 +933,8 @@ class TLCIE(DiscoveryPlusBaseIE):
}
-class MotorTrendIE(DiscoveryPlusBaseIE):
- _VALID_URL = r'https?://(?:watch\.)?motortrend\.com/video' + DPlayBaseIE._PATH_REGEX
- _TESTS = [{
- 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
- 'info_dict': {
- 'id': '"4859182"',
- 'display_id': 'double-dakotas',
- 'ext': 'mp4',
- 'title': 'Double Dakotas',
- 'description': 'Tylers buy-one-get-one Dakota deal has the Wizard pulling double duty.',
- 'season_number': 2,
- 'episode_number': 3,
- },
- 'skip': 'Available for Premium users',
- }, {
- 'url': 'https://watch.motortrend.com/video/car-issues-motortrend-atve-us/double-dakotas',
- 'only_matching': True,
- }]
-
- _PRODUCT = 'vel'
- _DISCO_API_PARAMS = {
- 'disco_host': 'us1-prod-direct.watch.motortrend.com',
- 'realm': 'go',
- 'country': 'us',
- }
-
-
-class MotorTrendOnDemandIE(DiscoveryPlusBaseIE):
- _VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX
- _TESTS = [{
- 'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784',
- 'info_dict': {
- 'id': '37699',
- 'display_id': 'wheelstanding-dump-truck-stubby-bobs-comeback/37699',
- 'ext': 'mp4',
- 'title': 'Wheelstanding Dump Truck! Stubby Bob’s Comeback',
- 'description': 'md5:996915abe52a1c3dfc83aecea3cce8e7',
- 'season_number': 5,
- 'episode_number': 52,
- 'episode': 'Episode 52',
- 'season': 'Season 5',
- 'thumbnail': r're:^https?://.+\.jpe?g$',
- 'timestamp': 1388534401,
- 'duration': 1887.345,
- 'creator': 'Originals',
- 'series': 'Roadkill',
- 'upload_date': '20140101',
- 'tags': [],
- },
- }, {
- 'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/',
- 'info_dict': {
- 'id': '4922860',
- 'ext': 'mp4',
- 'title': 'Roadworthy Rescues | Teaser Trailer',
- 'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.',
- 'display_id': 'roadworthy-rescues-teaser-trailer/4922860',
- 'creator': 'Originals',
- 'series': 'Roadworthy Rescues',
- 'thumbnail': r're:^https?://.+\.jpe?g$',
- 'upload_date': '20220907',
- 'timestamp': 1662523200,
- 'duration': 1066.356,
- 'tags': [],
- },
- }, {
- 'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439',
- 'only_matching': True,
- }]
-
- _PRODUCT = 'MTOD'
- _DISCO_API_PARAMS = {
- 'disco_host': 'us1-prod-direct.motortrendondemand.com',
- 'realm': 'motortrend',
- 'country': 'us',
- }
-
- def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
- headers.update({
- 'x-disco-params': f'realm={realm}',
- 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:4.39.1-gi1',
- 'Authorization': self._get_auth(disco_base, display_id, realm),
- })
-
-
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
- _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport)?' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
'info_dict': {
@@ -823,14 +955,42 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE):
}, {
'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers',
'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review',
+ 'only_matching': True,
}]
- _PRODUCT = 'dplus_us'
- _DISCO_API_PARAMS = {
- 'disco_host': 'us1-prod-direct.discoveryplus.com',
- 'realm': 'go',
- 'country': 'us',
- }
+ _PRODUCT = None
+ _DISCO_API_PARAMS = None
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
+ 'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
+
+ def _real_extract(self, url):
+ video_id, country = self._match_valid_url(url).group('id', 'country')
+ if not country:
+ country = 'us'
+
+ self._PRODUCT = f'dplus_{country}'
+
+ if country in ('br', 'ca', 'us'):
+ self._DISCO_API_PARAMS = {
+ 'disco_host': 'us1-prod-direct.discoveryplus.com',
+ 'realm': 'go',
+ 'country': country,
+ }
+ else:
+ self._DISCO_API_PARAMS = {
+ 'disco_host': 'eu1-prod-direct.discoveryplus.com',
+ 'realm': 'dplay',
+ 'country': country,
+ }
+
+ return self._get_disco_api_info(url, video_id, **self._DISCO_API_PARAMS)
class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE):
@@ -993,7 +1153,7 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
'only_matching': True,
}]
- _PRODUCT = 'dplus_us'
+ _PRODUCT = 'dplus_it'
_DISCO_API_PARAMS = {
'disco_host': 'eu1-prod-direct.discoveryplus.com',
'realm': 'dplay',
@@ -1002,8 +1162,8 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE):
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
headers.update({
- 'x-disco-params': f'realm={realm}',
- 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6',
+ 'x-disco-params': f'realm={realm},siteLookupKey={self._PRODUCT}',
+ 'x-disco-client': f'WEB:UNKNOWN:dplus_us:{self._DISCO_CLIENT_VER}',
'Authorization': self._get_auth(disco_base, display_id, realm),
})
@@ -1044,39 +1204,3 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
_SHOW_STR = 'show'
_INDEX = 4
_VIDEO_IE = DiscoveryPlusIndiaIE
-
-
-class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE):
- _VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://plus.globalcyclingnetwork.com/watch/1397691',
- 'info_dict': {
- 'id': '1397691',
- 'ext': 'mp4',
- 'title': 'The Athertons: Mountain Biking\'s Fastest Family',
- 'description': 'md5:75a81937fcd8b989eec6083a709cd837',
- 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png',
- 'series': 'gcn',
- 'creator': 'Gcn',
- 'upload_date': '20210309',
- 'timestamp': 1615248000,
- 'duration': 2531.0,
- 'tags': [],
- },
- 'skip': 'Subscription required',
- 'params': {'skip_download': 'm3u8'},
- }]
-
- _PRODUCT = 'web'
- _DISCO_API_PARAMS = {
- 'disco_host': 'disco-api-prod.globalcyclingnetwork.com',
- 'realm': 'gcn',
- 'country': 'us',
- }
-
- def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
- headers.update({
- 'x-disco-params': f'realm={realm}',
- 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2',
- 'Authorization': self._get_auth(disco_base, display_id, realm),
- })
diff --git a/yt_dlp/extractor/epidemicsound.py b/yt_dlp/extractor/epidemicsound.py
index 0d81b11..75b0f05 100644
--- a/yt_dlp/extractor/epidemicsound.py
+++ b/yt_dlp/extractor/epidemicsound.py
@@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import (
float_or_none,
int_or_none,
+ join_nonempty,
orderedSet,
parse_iso8601,
parse_qs,
@@ -13,7 +14,7 @@ from ..utils import (
class EpidemicSoundIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
+ _VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/(?:(?P<sfx>sound-effects/tracks)|track)/(?P<id>[0-9a-zA-Z-]+)'
_TESTS = [{
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
@@ -47,6 +48,20 @@ class EpidemicSoundIE(InfoExtractor):
'release_timestamp': 1700535606,
'release_date': '20231121',
},
+ }, {
+ 'url': 'https://www.epidemicsound.com/sound-effects/tracks/2f02f54b-9faa-4daf-abac-1cfe9e9cef69/',
+ 'md5': '35d7cf05bd8b614a84f0495a05de9388',
+ 'info_dict': {
+ 'id': '208931',
+ 'ext': 'mp3',
+ 'upload_date': '20240603',
+ 'timestamp': 1717436529,
+ 'categories': ['appliance'],
+ 'display_id': '6b2NXLURPr',
+ 'duration': 1.0,
+ 'title': 'Oven, Grill, Door Open 01',
+ 'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
+ },
}]
@staticmethod
@@ -77,8 +92,10 @@ class EpidemicSoundIE(InfoExtractor):
return f
def _real_extract(self, url):
- video_id = self._match_id(url)
- json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id)
+ video_id, is_sfx = self._match_valid_url(url).group('id', 'sfx')
+ json_data = self._download_json(join_nonempty(
+ 'https://www.epidemicsound.com/json/track',
+ is_sfx and 'kosmos-id', video_id, delim='/'), video_id)
thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')])
thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none}))
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 3b8e1e9..04cffaa 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -43,6 +43,7 @@ from ..utils import (
xpath_text,
xpath_with_ns,
)
+from ..utils._utils import _UnsafeExtensionError
class GenericIE(InfoExtractor):
@@ -2446,9 +2447,13 @@ class GenericIE(InfoExtractor):
if not is_html(first_bytes):
self.report_warning(
'URL could be a direct video link, returning it as such.')
+ ext = determine_ext(url)
+ if ext not in _UnsafeExtensionError.ALLOWED_EXTENSIONS:
+ ext = 'unknown_video'
info_dict.update({
'direct': True,
'url': url,
+ 'ext': ext,
})
return info_dict
diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py
index 726fe41..72e89c3 100644
--- a/yt_dlp/extractor/picarto.py
+++ b/yt_dlp/extractor/picarto.py
@@ -5,6 +5,7 @@ from ..utils import (
ExtractorError,
str_or_none,
traverse_obj,
+ update_url,
)
@@ -43,15 +44,16 @@ class PicartoIE(InfoExtractor):
url
}
}''' % (channel_id, channel_id), # noqa: UP031
- })['data']
+ }, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data']
metadata = data['channel']
if metadata.get('online') == 0:
raise ExtractorError('Stream is offline', expected=True)
title = metadata['title']
- cdn_data = self._download_json(
- data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
+ cdn_data = self._download_json(''.join((
+ update_url(data['getLoadBalancerUrl']['url'], scheme='https'),
+ '/stream/json_', metadata['stream_name'], '.js')),
channel_id, 'Downloading load balancing info')
formats = []
@@ -99,10 +101,10 @@ class PicartoVodIE(InfoExtractor):
},
'skip': 'The VOD does not exist',
}, {
- 'url': 'https://picarto.tv/ArtofZod/videos/772650',
- 'md5': '00067a0889f1f6869cc512e3e79c521b',
+ 'url': 'https://picarto.tv/ArtofZod/videos/771008',
+ 'md5': 'abef5322f2700d967720c4c6754b2a34',
'info_dict': {
- 'id': '772650',
+ 'id': '771008',
'ext': 'mp4',
'title': 'Art of Zod - Drawing and Painting',
'thumbnail': r're:^https?://.*\.jpg',
@@ -131,7 +133,7 @@ class PicartoVodIE(InfoExtractor):
}}
}}
}}''',
- })['data']['video']
+ }, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data']['video']
file_name = data['file_name']
netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index afb512d..4f8d964 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -871,7 +871,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE):
'id': '30909869',
'title': 'neilcic',
},
- 'playlist_mincount': 23,
+ 'playlist_mincount': 22,
}]
def _real_extract(self, url):
@@ -880,7 +880,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE):
self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS)
return self._extract_playlist(
- f'{self._API_V2_BASE}stream/users/{user["id"]}', str(user['id']), user.get('username'))
+ f'{self._API_V2_BASE}users/{user["id"]}/tracks', str(user['id']), user.get('username'))
class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index c3505b1..aa1dcec 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -1458,9 +1458,11 @@ class TikTokLiveIE(TikTokBaseIE):
if webpage:
data = self._get_sigi_state(webpage, uploader or room_id)
- room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
- or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
- or room_id)
+ room_id = (
+ traverse_obj(data, ((
+ ('LiveRoom', 'liveRoomUserInfo', 'user'),
+ ('UserModule', 'users', ...)), 'roomId', {str}, any))
+ or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=room_id))
uploader = uploader or traverse_obj(
data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py
index 52ff230..953eb77 100644
--- a/yt_dlp/extractor/tv5mondeplus.py
+++ b/yt_dlp/extractor/tv5mondeplus.py
@@ -96,7 +96,7 @@ class TV5MondePlusIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ webpage = self._download_webpage(url, display_id, impersonate=True)
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
@@ -122,8 +122,9 @@ class TV5MondePlusIE(InfoExtractor):
if not token:
continue
deferred_json = self._download_json(
- f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true', display_id,
- note='Downloading deferred info', headers={'Authorization': f'Bearer {token}'}, fatal=False)
+ f'https://api.tv5monde.com/player/asset/{d_param}/resolve?condenseKS=true',
+ display_id, 'Downloading deferred info', fatal=False, impersonate=True,
+ headers={'Authorization': f'Bearer {token}'})
v_url = traverse_obj(deferred_json, (0, 'url', {url_or_none}))
if not v_url:
continue
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 1c0a70d..53aca38 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -270,7 +270,7 @@ def build_innertube_clients():
THIRD_PARTY = {
'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
}
- BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
+ BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
@@ -1294,6 +1294,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
}
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+ _POTOKEN_EXPERIMENTS = ('51217476', '51217102')
_GEO_BYPASS = False
@@ -3142,7 +3143,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
- func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
+ func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode)
@@ -3701,8 +3702,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return pr_id
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
- initial_pr = None
+ initial_pr = ignore_initial_response = None
if webpage:
+ if 'web' in clients:
+ experiments = traverse_obj(master_ytcfg, (
+ 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...))
+ if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
+ self.report_warning(
+ 'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response')
+ ignore_initial_response = True
initial_pr = self._search_json(
self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
@@ -3732,8 +3740,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
skipped_clients = {}
while clients:
client, base_client, variant = _split_innertube_client(clients.pop())
- player_ytcfg = master_ytcfg if client == 'web' else {}
- if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
+ player_ytcfg = {}
+ if client == 'web':
+ player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg
+ elif 'configs' not in self._configuration_arg('player_skip'):
player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
@@ -3746,11 +3756,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_url = self._download_player_url(video_id)
tried_iframe_fallback = True
- try:
- pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
- client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
- except ExtractorError as e:
- self.report_warning(e)
+ pr = initial_pr if client == 'web' and not ignore_initial_response else None
+ for retry in self.RetryManager(fatal=False):
+ try:
+ pr = pr or self._extract_player_response(
+ client, video_id, player_ytcfg or master_ytcfg, player_ytcfg,
+ player_url if require_js_player else None, initial_pr, smuggled_data)
+ except ExtractorError as e:
+ self.report_warning(e)
+ break
+ experiments = traverse_obj(pr, (
+ 'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK',
+ 'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...))
+ if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
+ pr = None
+ retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True)
+ if not pr:
continue
if pr_id := self._invalid_player_response(pr, video_id):