summaryrefslogtreecommitdiffstats
path: root/yt_dlp
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp')
-rw-r--r--yt_dlp/extractor/_extractors.py7
-rw-r--r--yt_dlp/extractor/abematv.py3
-rw-r--r--yt_dlp/extractor/cbc.py269
-rw-r--r--yt_dlp/extractor/common.py8
-rw-r--r--yt_dlp/extractor/dplay.py5
-rw-r--r--yt_dlp/extractor/kick.py205
-rw-r--r--yt_dlp/extractor/learningonscreen.py78
-rw-r--r--yt_dlp/extractor/mediaklikk.py4
-rw-r--r--yt_dlp/extractor/mlb.py20
-rw-r--r--yt_dlp/extractor/olympics.py106
-rw-r--r--yt_dlp/extractor/tva.py72
-rw-r--r--yt_dlp/extractor/tver.py26
-rw-r--r--yt_dlp/extractor/unsupported.py4
-rw-r--r--yt_dlp/extractor/vimeo.py24
-rw-r--r--yt_dlp/extractor/youtube.py279
-rw-r--r--yt_dlp/utils/_utils.py2
-rw-r--r--yt_dlp/version.py6
17 files changed, 813 insertions, 305 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index d2140bc..9b73fcd 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -939,6 +939,7 @@ from .khanacademy import (
KhanAcademyUnitIE,
)
from .kick import (
+ KickClipIE,
KickIE,
KickVODIE,
)
@@ -986,6 +987,7 @@ from .lcp import (
LcpIE,
LcpPlayIE,
)
+from .learningonscreen import LearningOnScreenIE
from .lecture2go import Lecture2GoIE
from .lecturio import (
LecturioCourseIE,
@@ -2169,10 +2171,7 @@ from .tv5unis import (
TV5UnisVideoIE,
)
from .tv24ua import TV24UAVideoIE
-from .tva import (
- TVAIE,
- QubIE,
-)
+from .tva import TVAIE
from .tvanouvelles import (
TVANouvellesArticleIE,
TVANouvellesIE,
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 2611c6f..66ab083 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -377,8 +377,7 @@ class AbemaTVIE(AbemaTVBaseIE):
f'https://api.abema.io/v1/video/programs/{video_id}', video_id,
note='Checking playability',
headers=headers)
- ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType'))
- if 3 not in ondemand_types:
+ if not traverse_obj(api_response, ('label', 'free', {bool})):
# cannot acquire decryption key for these streams
self.report_warning('This is a premium-only stream')
availability = 'premium_only'
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index 1522b08..373c9d2 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -1,4 +1,5 @@
import base64
+import functools
import json
import re
import time
@@ -6,17 +7,24 @@ import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor
+from ..networking import HEADRequest
from ..utils import (
ExtractorError,
+ float_or_none,
int_or_none,
join_nonempty,
js_to_json,
+ mimetype2ext,
orderedSet,
parse_iso8601,
+ replace_extension,
smuggle_url,
strip_or_none,
traverse_obj,
try_get,
+ update_url,
+ url_basename,
+ url_or_none,
)
@@ -149,6 +157,7 @@ class CBCIE(InfoExtractor):
class CBCPlayerIE(InfoExtractor):
IE_NAME = 'cbc.ca:player'
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
+ _GEO_COUNTRIES = ['CA']
_TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c',
@@ -172,21 +181,20 @@ class CBCPlayerIE(InfoExtractor):
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
'timestamp': 1425704400,
'upload_date': '20150307',
- 'uploader': 'CBCC-NEW',
- 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
+ 'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
'chapters': [],
'duration': 494.811,
- 'categories': ['AudioMobile/All in a Weekend Montreal'],
- 'tags': 'count:8',
+ 'categories': ['All in a Weekend Montreal'],
+ 'tags': 'count:11',
'location': 'Quebec',
'series': 'All in a Weekend Montreal',
'season': 'Season 2015',
'season_number': 2015,
'media_type': 'Excerpt',
+ 'genres': ['Other'],
},
}, {
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
- 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
'info_dict': {
'id': '2164402062',
'ext': 'mp4',
@@ -194,107 +202,168 @@ class CBCPlayerIE(InfoExtractor):
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
'timestamp': 1320410746,
'upload_date': '20111104',
- 'uploader': 'CBCC-NEW',
- 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
+ 'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
'chapters': [],
'duration': 186.867,
'series': 'CBC News: Windsor at 6:00',
- 'categories': ['News/Canada/Windsor'],
+ 'categories': ['Windsor'],
'location': 'Windsor',
- 'tags': ['cancer'],
- 'creators': ['Allison Johnson'],
+ 'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
'media_type': 'Excerpt',
+ 'genres': ['News'],
},
+ 'params': {'skip_download': 'm3u8'},
}, {
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
'url': 'https://www.cbc.ca/player/play/1.2985700',
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
'info_dict': {
- 'id': '2657631896',
+ 'id': '1.2985700',
'ext': 'mp3',
'title': 'CBC Montreal is organizing its first ever community hackathon!',
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
'timestamp': 1425704400,
'upload_date': '20150307',
- 'uploader': 'CBCC-NEW',
- 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
+ 'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg',
'chapters': [],
'duration': 494.811,
- 'categories': ['AudioMobile/All in a Weekend Montreal'],
- 'tags': 'count:8',
+ 'categories': ['All in a Weekend Montreal'],
+ 'tags': 'count:11',
'location': 'Quebec',
'series': 'All in a Weekend Montreal',
'season': 'Season 2015',
'season_number': 2015,
'media_type': 'Excerpt',
+ 'genres': ['Other'],
},
}, {
'url': 'https://www.cbc.ca/player/play/1.1711287',
- 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
'info_dict': {
- 'id': '2164402062',
+ 'id': '1.1711287',
'ext': 'mp4',
'title': 'Cancer survivor four times over',
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
'timestamp': 1320410746,
'upload_date': '20111104',
- 'uploader': 'CBCC-NEW',
- 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
+ 'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg',
'chapters': [],
'duration': 186.867,
'series': 'CBC News: Windsor at 6:00',
- 'categories': ['News/Canada/Windsor'],
+ 'categories': ['Windsor'],
'location': 'Windsor',
- 'tags': ['cancer'],
- 'creators': ['Allison Johnson'],
+ 'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'],
'media_type': 'Excerpt',
+ 'genres': ['News'],
},
+ 'params': {'skip_download': 'm3u8'},
}, {
# Has subtitles
# These broadcasts expire after ~1 month, can find new test URL here:
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
- 'url': 'https://www.cbc.ca/player/play/1.7159484',
- 'md5': '6ed6cd0fc2ef568d2297ba68a763d455',
+ 'url': 'https://www.cbc.ca/player/play/video/9.6424403',
+ 'md5': '8025909eaffcf0adf59922904def9a5e',
'info_dict': {
- 'id': '2324213316001',
+ 'id': '9.6424403',
'ext': 'mp4',
- 'title': 'The National | School boards sue social media giants',
- 'description': 'md5:4b4db69322fa32186c3ce426da07402c',
- 'timestamp': 1711681200,
- 'duration': 2743.400,
- 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
- 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg',
- 'uploader': 'CBCC-NEW',
+ 'title': 'The National | N.W.T. wildfire emergency',
+ 'description': 'md5:ada33d36d1df69347ed575905bfd496c',
+ 'timestamp': 1718589600,
+ 'duration': 2692.833,
+ 'subtitles': {
+ 'en-US': [{
+ 'name': 'English Captions',
+ 'url': 'https://cbchls.akamaized.net/delivery/news-shows/2024/06/17/NAT_JUN16-00-55-00/NAT_JUN16_cc.vtt',
+ }],
+ },
+ 'thumbnail': 'https://i.cbc.ca/ais/6272b5c6-5e78-4c05-915d-0e36672e33d1,1714756287822/full/max/0/default.jpg',
'chapters': 'count:5',
- 'upload_date': '20240329',
- 'categories': 'count:4',
+ 'upload_date': '20240617',
+ 'categories': ['News', 'The National', 'The National Latest Broadcasts'],
'series': 'The National - Full Show',
- 'tags': 'count:1',
- 'creators': ['News'],
+ 'tags': ['The National'],
'location': 'Canada',
'media_type': 'Full Program',
+ 'genres': ['News'],
},
}, {
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
'md5': '188b96cf6bdcb2540e178a6caa957128',
'info_dict': {
- 'id': '2334524995812',
+ 'id': '1.7194274',
'ext': 'mp4',
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
'timestamp': 1714788791,
'duration': 77.678,
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
- 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
- 'uploader': 'CBCC-NEW',
- 'chapters': 'count:0',
- 'upload_date': '20240504',
+ 'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg',
+ 'chapters': [],
'categories': 'count:3',
'series': 'The National',
- 'tags': 'count:15',
- 'creators': ['encoder'],
+ 'tags': 'count:17',
+ 'location': 'Canada',
+ 'media_type': 'Excerpt',
+ 'upload_date': '20240504',
+ 'genres': ['News'],
+ },
+ }, {
+ 'url': 'https://www.cbc.ca/player/play/video/9.6427282',
+ 'info_dict': {
+ 'id': '9.6427282',
+ 'ext': 'mp4',
+ 'title': 'Men\'s Soccer - Argentina vs Morocco',
+ 'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.',
+ 'series': 'CBC Sports',
+ 'media_type': 'Event Coverage',
+ 'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg',
+ 'timestamp': 1721825400.0,
+ 'upload_date': '20240724',
+ 'duration': 10568.0,
+ 'chapters': [],
+ 'genres': [],
+ 'tags': ['2024 Paris Olympic Games'],
+ 'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'],
'location': 'Canada',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://www.cbc.ca/player/play/video/9.6459530',
+ 'md5': '6c1bb76693ab321a2e99c347a1d5ecbc',
+ 'info_dict': {
+ 'id': '9.6459530',
+ 'ext': 'mp4',
+ 'title': 'Parts of Jasper incinerated as wildfire rages',
+ 'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962',
+ 'series': 'The National',
'media_type': 'Excerpt',
+ 'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg',
+ 'timestamp': 1721964091.012,
+ 'upload_date': '20240726',
+ 'duration': 952.285,
+ 'chapters': [],
+ 'genres': [],
+ 'tags': 'count:23',
+ 'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '],
+ },
+ }, {
+ 'url': 'https://www.cbc.ca/player/play/video/9.6420651',
+ 'md5': '71a850c2c6ee5e912de169f5311bb533',
+ 'info_dict': {
+ 'id': '9.6420651',
+ 'ext': 'mp4',
+ 'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton',
+ 'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3',
+ 'series': 'CBC News Edmonton',
+ 'media_type': 'Excerpt',
+ 'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg',
+ 'timestamp': 1718220065.768,
+ 'upload_date': '20240612',
+ 'duration': 286.086,
+ 'chapters': [],
+ 'genres': ['News'],
+ 'categories': ['News', 'Edmonton'],
+ 'tags': 'count:7',
+ 'location': 'Edmonton',
},
}, {
'url': 'cbcplayer:1.7159484',
@@ -307,23 +376,113 @@ class CBCPlayerIE(InfoExtractor):
'only_matching': True,
}]
+ def _parse_param(self, asset_data, name):
+ return traverse_obj(asset_data, ('params', lambda _, v: v['name'] == name, 'value', {str}, any))
+
def _real_extract(self, url):
video_id = self._match_id(url)
- if '.' in video_id:
- webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
- video_id = self._search_json(
- r'window\.__INITIAL_STATE__\s*=', webpage,
- 'initial state', video_id)['video']['currentClip']['mediaId']
+ webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
+ data = self._search_json(
+ r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)['video']['currentClip']
+ assets = traverse_obj(
+ data, ('media', 'assets', lambda _, v: url_or_none(v['key']) and v['type']))
+
+ if not assets and (media_id := traverse_obj(data, ('mediaId', {str}))):
+ # XXX: Deprecated; CBC is migrating off of ThePlatform
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(
+ f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', {
+ 'force_smil_url': True,
+ }),
+ 'id': media_id,
+ '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
+ }
+
+ is_live = traverse_obj(data, ('media', 'streamType', {str})) == 'Live'
+ formats, subtitles = [], {}
+
+ for sub in traverse_obj(data, ('media', 'textTracks', lambda _, v: url_or_none(v['src']))):
+ subtitles.setdefault(sub.get('language') or 'und', []).append({
+ 'url': sub['src'],
+ 'name': sub.get('label'),
+ })
+
+ for asset in assets:
+ asset_key = asset['key']
+ asset_type = asset['type']
+ if asset_type != 'medianet':
+ self.report_warning(f'Skipping unsupported asset type "{asset_type}": {asset_key}')
+ continue
+ asset_data = self._download_json(asset_key, video_id, f'Downloading {asset_type} JSON')
+ ext = mimetype2ext(self._parse_param(asset_data, 'contentType'))
+ if ext == 'm3u8':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ asset_data['url'], video_id, 'mp4', m3u8_id='hls', live=is_live)
+ formats.extend(fmts)
+ # Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available
+ if not subtitles:
+ self._merge_subtitles(subs, target=subtitles)
+ if is_live or not fmts:
+ continue
+ # Check for direct https mp4 format
+ best_video_fmt = traverse_obj(fmts, (
+ lambda _, v: v.get('vcodec') != 'none' and v['tbr'], all,
+ {functools.partial(sorted, key=lambda x: x['tbr'])}, -1, {dict})) or {}
+ base_url = self._search_regex(
+ r'(https?://[^?#]+?/)hdntl=', best_video_fmt.get('url'), 'base url', default=None)
+ if not base_url or '/live/' in base_url:
+ continue
+ mp4_url = base_url + replace_extension(url_basename(best_video_fmt['url']), 'mp4')
+ if self._request_webpage(
+ HEADRequest(mp4_url), video_id, 'Checking for https format',
+ errnote=False, fatal=False):
+ formats.append({
+ **best_video_fmt,
+ 'url': mp4_url,
+ 'format_id': 'https-mp4',
+ 'protocol': 'https',
+ 'manifest_url': None,
+ 'acodec': None,
+ })
+ else:
+ formats.append({
+ 'url': asset_data['url'],
+ 'ext': ext,
+ 'vcodec': 'none' if self._parse_param(asset_data, 'mediaType') == 'audio' else None,
+ })
+
+ chapters = traverse_obj(data, (
+ 'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
+ 'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}),
+ 'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}),
+ 'title': ('name', {str}),
+ }))
+ # Filter out pointless single chapters with start_time==0 and no end_time
+ if len(chapters) == 1 and not (chapters[0].get('start_time') or chapters[0].get('end_time')):
+ chapters = []
return {
- '_type': 'url_transparent',
- 'ie_key': 'ThePlatform',
- 'url': smuggle_url(
- f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', {
- 'force_smil_url': True,
- }),
+ **traverse_obj(data, {
+ 'title': ('title', {str}),
+ 'description': ('description', {str.strip}),
+ 'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}),
+ 'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}),
+ 'media_type': ('media', 'clipType', {str}),
+ 'series': ('showName', {str}),
+ 'season_number': ('media', 'season', {int_or_none}),
+ 'duration': ('media', 'duration', {float_or_none}, {lambda x: None if is_live else x}),
+ 'location': ('media', 'region', {str}),
+ 'tags': ('tags', ..., 'name', {str}),
+ 'genres': ('media', 'genre', all),
+ 'categories': ('categories', ..., 'name', {str}),
+ }),
'id': video_id,
- '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'chapters': chapters,
+ 'is_live': is_live,
}
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index f63bd78..187f73e 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3150,7 +3150,7 @@ class InfoExtractor:
})
return formats, subtitles
- def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None):
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None, _headers=None):
def absolute_url(item_url):
return urljoin(base_url, item_url)
@@ -3174,11 +3174,11 @@ class InfoExtractor:
formats = self._extract_m3u8_formats(
full_url, video_id, ext='mp4',
entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
- preference=preference, quality=quality, fatal=False)
+ preference=preference, quality=quality, fatal=False, headers=_headers)
elif ext == 'mpd':
is_plain_url = False
formats = self._extract_mpd_formats(
- full_url, video_id, mpd_id=mpd_id, fatal=False)
+ full_url, video_id, mpd_id=mpd_id, fatal=False, headers=_headers)
else:
is_plain_url = True
formats = [{
@@ -3272,6 +3272,8 @@ class InfoExtractor:
})
for f in media_info['formats']:
f.setdefault('http_headers', {})['Referer'] = base_url
+ if _headers:
+ f['http_headers'].update(_headers)
if media_info['formats'] or media_info['subtitles']:
entries.append(media_info)
return entries
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index e9f9357..cdf84c5 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -934,7 +934,7 @@ class TLCIE(DiscoveryPlusBaseIE):
class DiscoveryPlusIE(DiscoveryPlusBaseIE):
- _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport)?' + DPlayBaseIE._PATH_REGEX
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
'info_dict': {
@@ -958,6 +958,9 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE):
}, {
'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review',
'only_matching': True,
+ }, {
+ 'url': 'https://www.discoveryplus.com/gb/video/olympics/dplus-sport-dplus-sport-sport/rugby-sevens-australia-samoa',
+ 'only_matching': True,
}]
_PRODUCT = None
diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py
index 889548f..1c1b2a1 100644
--- a/yt_dlp/extractor/kick.py
+++ b/yt_dlp/extractor/kick.py
@@ -1,9 +1,14 @@
+import functools
+
from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import (
UserNotLive,
+ determine_ext,
float_or_none,
+ int_or_none,
merge_dicts,
+ parse_iso8601,
str_or_none,
traverse_obj,
unified_timestamp,
@@ -25,104 +30,192 @@ class KickBaseIE(InfoExtractor):
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
return self._download_json(
- f'https://kick.com/api/v1/{path}', display_id, note=note,
+ f'https://kick.com/api/{path}', display_id, note=note,
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
class KickIE(KickBaseIE):
+ IE_NAME = 'kick:live'
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
_TESTS = [{
- 'url': 'https://kick.com/yuppy',
+ 'url': 'https://kick.com/buddha',
'info_dict': {
- 'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21',
+ 'id': '92722911-nopixel-40',
'ext': 'mp4',
'title': str,
'description': str,
- 'channel': 'yuppy',
- 'channel_id': '33538',
- 'uploader': 'Yuppy',
- 'uploader_id': '33793',
- 'upload_date': str,
- 'live_status': 'is_live',
'timestamp': int,
- 'thumbnail': r're:^https?://.*\.jpg',
+ 'thumbnail': r're:https?://.+\.jpg',
'categories': list,
+ 'upload_date': str,
+ 'channel': 'buddha',
+ 'channel_id': '32807',
+ 'uploader': 'Buddha',
+ 'uploader_id': '33057',
+ 'live_status': 'is_live',
+ 'concurrent_view_count': int,
+ 'release_timestamp': int,
+ 'age_limit': 18,
+ 'release_date': str,
},
- 'skip': 'livestream',
+ 'params': {'skip_download': 'livestream'},
+ # 'skip': 'livestream',
}, {
- 'url': 'https://kick.com/kmack710',
+ 'url': 'https://kick.com/xqc',
'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return False if KickClipIE.suitable(url) else super().suitable(url)
+
def _real_extract(self, url):
channel = self._match_id(url)
- response = self._call_api(f'channels/{channel}', channel)
+ response = self._call_api(f'v2/channels/{channel}', channel)
if not traverse_obj(response, 'livestream', expected_type=dict):
raise UserNotLive(video_id=channel)
return {
- 'id': str(traverse_obj(
- response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
- 'formats': self._extract_m3u8_formats(
- response['playback_url'], channel, 'mp4', live=True),
- 'title': traverse_obj(
- response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
- 'description': traverse_obj(response, ('user', 'bio')),
'channel': channel,
- 'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
- 'uploader': traverse_obj(response, 'name', ('user', 'username')),
- 'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
'is_live': True,
- 'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))),
- 'thumbnail': traverse_obj(
- response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none),
- 'categories': traverse_obj(response, ('recent_categories', ..., 'name')),
+ 'formats': self._extract_m3u8_formats(response['playback_url'], channel, 'mp4', live=True),
+ **traverse_obj(response, {
+ 'id': ('livestream', 'slug', {str}),
+ 'title': ('livestream', 'session_title', {str}),
+ 'description': ('user', 'bio', {str}),
+ 'channel_id': (('id', ('livestream', 'channel_id')), {int}, {str_or_none}, any),
+ 'uploader': (('name', ('user', 'username')), {str}, any),
+ 'uploader_id': (('user_id', ('user', 'id')), {int}, {str_or_none}, any),
+ 'timestamp': ('livestream', 'created_at', {unified_timestamp}),
+ 'release_timestamp': ('livestream', 'start_time', {unified_timestamp}),
+ 'thumbnail': ('livestream', 'thumbnail', 'url', {url_or_none}),
+ 'categories': ('recent_categories', ..., 'name', {str}),
+ 'concurrent_view_count': ('livestream', 'viewer_count', {int_or_none}),
+ 'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
+ }),
}
class KickVODIE(KickBaseIE):
+ IE_NAME = 'kick:vod'
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
_TESTS = [{
- 'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
+ 'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c',
'md5': '3870f94153e40e7121a6e46c068b70cb',
'info_dict': {
- 'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
+ 'id': 'e74614f4-5270-4319-90ad-32179f19a45c',
'ext': 'mp4',
- 'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
- 'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
- 'channel': 'jaredfps',
- 'channel_id': '26608',
- 'uploader': 'JaredFPS',
- 'uploader_id': '26799',
- 'upload_date': '20240402',
- 'timestamp': 1712097108,
- 'duration': 33859.0,
+ 'title': r're:❎ MEGA DRAMA ❎ LIVE ❎ CLICK ❎ ULTIMATE SKILLS .+',
+ 'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.',
+ 'channel': 'xqc',
+ 'channel_id': '668',
+ 'uploader': 'xQc',
+ 'uploader_id': '676',
+ 'upload_date': '20240724',
+ 'timestamp': 1721796562,
+ 'duration': 18566.0,
'thumbnail': r're:^https?://.*\.jpg',
- 'categories': ['Call of Duty: Warzone'],
+ 'view_count': int,
+ 'categories': ['VALORANT'],
+ 'age_limit': 0,
},
- 'params': {
- 'skip_download': 'm3u8',
- },
- 'expected_warnings': [r'impersonation'],
+ 'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- response = self._call_api(f'video/{video_id}', video_id)
+ response = self._call_api(f'v1/video/{video_id}', video_id)
return {
'id': video_id,
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
- 'title': traverse_obj(
- response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
- 'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')),
- 'channel': traverse_obj(response, ('livestream', 'channel', 'slug')),
- 'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))),
- 'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')),
- 'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))),
- 'timestamp': unified_timestamp(response.get('created_at')),
- 'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000),
- 'thumbnail': traverse_obj(
- response, ('livestream', 'thumbnail'), expected_type=url_or_none),
- 'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')),
+ **traverse_obj(response, {
+ 'title': ('livestream', ('session_title', 'slug'), {str}, any),
+ 'description': ('livestream', 'channel', 'user', 'bio', {str}),
+ 'channel': ('livestream', 'channel', 'slug', {str}),
+ 'channel_id': ('livestream', 'channel', 'id', {int}, {str_or_none}),
+ 'uploader': ('livestream', 'channel', 'user', 'username', {str}),
+ 'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
+ 'timestamp': ('created_at', {parse_iso8601}),
+ 'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}),
+ 'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
+ 'categories': ('livestream', 'categories', ..., 'name', {str}),
+ 'view_count': ('views', {int_or_none}),
+ 'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}),
+ }),
+ }
+
+
+class KickClipIE(KickBaseIE):
+ IE_NAME = 'kick:clips'
+ _VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?P<id>clip_[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
+ 'info_dict': {
+ 'id': 'clip_01GYXVB5Y8PWAPWCWMSBCFB05X',
+ 'ext': 'mp4',
+ 'title': 'Maddy detains Abd D:',
+ 'channel': 'mxddy',
+ 'channel_id': '133789',
+ 'uploader': 'AbdCreates',
+ 'uploader_id': '3309077',
+ 'thumbnail': r're:^https?://.*\.jpeg',
+ 'duration': 35,
+ 'timestamp': 1682481453,
+ 'upload_date': '20230426',
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': ['VALORANT'],
+ 'age_limit': 18,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'url': 'https://kick.com/destiny?clip=clip_01H9SKET879NE7N9RJRRDS98J3',
+ 'info_dict': {
+ 'id': 'clip_01H9SKET879NE7N9RJRRDS98J3',
+ 'title': 'W jews',
+ 'ext': 'mp4',
+ 'channel': 'destiny',
+ 'channel_id': '1772249',
+ 'uploader': 'punished_furry',
+ 'uploader_id': '2027722',
+ 'duration': 49.0,
+ 'upload_date': '20230908',
+ 'timestamp': 1694150180,
+ 'thumbnail': 'https://clips.kick.com/clips/j3/clip_01H9SKET879NE7N9RJRRDS98J3/thumbnail.png',
+ 'view_count': int,
+ 'like_count': int,
+ 'categories': ['Just Chatting'],
+ 'age_limit': 0,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }]
+
+ def _real_extract(self, url):
+ clip_id = self._match_id(url)
+ clip = self._call_api(f'v2/clips/{clip_id}/play', clip_id)['clip']
+ clip_url = clip['clip_url']
+
+ if determine_ext(clip_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(clip_url, clip_id, 'mp4')
+ else:
+ formats = [{'url': clip_url}]
+
+ return {
+ 'id': clip_id,
+ 'formats': formats,
+ **traverse_obj(clip, {
+ 'title': ('title', {str}),
+ 'channel': ('channel', 'slug', {str}),
+ 'channel_id': ('channel', 'id', {int}, {str_or_none}),
+ 'uploader': ('creator', 'username', {str}),
+ 'uploader_id': ('creator', 'id', {int}, {str_or_none}),
+ 'thumbnail': ('thumbnail_url', {url_or_none}),
+ 'duration': ('duration', {float_or_none}),
+ 'categories': ('category', 'name', {str}, all),
+ 'timestamp': ('created_at', {parse_iso8601}),
+ 'view_count': ('views', {int_or_none}),
+ 'like_count': ('likes', {int_or_none}),
+ 'age_limit': ('is_mature', {bool}, {lambda x: 18 if x else 0}),
+ }),
}
diff --git a/yt_dlp/extractor/learningonscreen.py b/yt_dlp/extractor/learningonscreen.py
new file mode 100644
index 0000000..dcf8314
--- /dev/null
+++ b/yt_dlp/extractor/learningonscreen.py
@@ -0,0 +1,78 @@
+import functools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ extract_attributes,
+ get_element_by_class,
+ get_element_html_by_id,
+ join_nonempty,
+ parse_duration,
+ unified_timestamp,
+)
+from ..utils.traversal import traverse_obj
+
+
+class LearningOnScreenIE(InfoExtractor):
+ _VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013',
+ 'info_dict': {
+ 'id': '005D81B2',
+ 'ext': 'mp4',
+ 'title': 'Planet Earth',
+ 'duration': 3600.0,
+ 'timestamp': 1164567600.0,
+ 'upload_date': '20061126',
+ 'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg',
+ },
+ }]
+
+ def _real_initialize(self):
+ if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
+ self.raise_login_required(
+ 'Use --cookies for authentication. See '
+ ' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
+ 'for how to manually pass cookies', method=None)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ details = traverse_obj(webpage, (
+ {functools.partial(get_element_html_by_id, 'programme-details')}, {
+ 'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
+ 'timestamp': (
+ {functools.partial(get_element_by_class, 'broadcast-date')},
+ {functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
+ 'duration': (
+ {functools.partial(get_element_by_class, 'prog-running-time')},
+ {clean_html}, {parse_duration}),
+ }))
+
+ title = details.pop('title', None) or traverse_obj(webpage, (
+ {functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
+ {extract_attributes}, 'data-record-title', {clean_html}))
+
+ entries = self._parse_html5_media_entries(
+ 'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash',
+ _headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'})
+ if not entries:
+ raise ExtractorError('No video found')
+
+ if len(entries) > 1:
+ duration = details.pop('duration', None)
+ for idx, entry in enumerate(entries, start=1):
+ entry.update(details)
+ entry['id'] = join_nonempty(video_id, idx)
+ entry['title'] = join_nonempty(title, idx)
+ return self.playlist_result(entries, video_id, title, duration=duration)
+
+ return {
+ **entries[0],
+ **details,
+ 'id': video_id,
+ 'title': title,
+ }
diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py
index bd1a27f..f513420 100644
--- a/yt_dlp/extractor/mediaklikk.py
+++ b/yt_dlp/extractor/mediaklikk.py
@@ -133,7 +133,9 @@ class MediaKlikkIE(InfoExtractor):
r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
player_data['video'] = player_data.pop('token')
- player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
+ player_page = self._download_webpage(
+ 'https://player.mediaklikk.hu/playernew/player.php', video_id,
+ query=player_data, headers={'Referer': url})
player_json = self._search_json(
r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
playlist_url = traverse_obj(
diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py
index 230c218..935bf85 100644
--- a/yt_dlp/extractor/mlb.py
+++ b/yt_dlp/extractor/mlb.py
@@ -290,9 +290,18 @@ class MLBTVIE(InfoExtractor):
'release_date': '20220702',
'release_timestamp': 1656792300,
},
- 'params': {
- 'skip_download': True,
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ # makeup game: has multiple dates, need to avoid games with 'rescheduleDate'
+ 'url': 'https://www.mlb.com/tv/g747039/vd22541c4-5a29-45f7-822b-635ec041cf5e',
+ 'info_dict': {
+ 'id': '747039',
+ 'ext': 'mp4',
+ 'title': '2024-07-29 - Toronto Blue Jays @ Baltimore Orioles',
+ 'release_date': '20240729',
+ 'release_timestamp': 1722280200,
},
+ 'params': {'skip_download': 'm3u8'},
}]
_GRAPHQL_INIT_QUERY = '''\
mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) {
@@ -463,11 +472,14 @@ mutation initPlaybackSession(
def _real_extract(self, url):
video_id = self._match_id(url)
- metadata = traverse_obj(self._download_json(
+ data = self._download_json(
'https://statsapi.mlb.com/api/v1/schedule', video_id, query={
'gamePk': video_id,
'hydrate': 'broadcasts(all),statusFlags',
- }), ('dates', ..., 'games', lambda _, v: str(v['gamePk']) == video_id and v['broadcasts'], any))
+ })
+ metadata = traverse_obj(data, (
+ 'dates', ..., 'games',
+ lambda _, v: str(v['gamePk']) == video_id and not v.get('rescheduleDate'), any))
broadcasts = traverse_obj(metadata, (
'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF'))
diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py
index becf052..a50c510 100644
--- a/yt_dlp/extractor/olympics.py
+++ b/yt_dlp/extractor/olympics.py
@@ -1,9 +1,17 @@
from .common import InfoExtractor
-from ..utils import int_or_none, try_get
+from ..networking.exceptions import HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+ url_or_none,
+)
+from ..utils.traversal import traverse_obj
class OlympicsReplayIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P<id>[^/#&?]+)'
+ _VALID_URL = r'https?://(?:www\.)?olympics\.com/[a-z]{2}/(?:paris-2024/)?(?:replay|videos?|original-series/episode)/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays',
'info_dict': {
@@ -11,26 +19,98 @@ class OlympicsReplayIE(InfoExtractor):
'ext': 'mp4',
'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020',
'upload_date': '20210801',
- 'timestamp': 1627783200,
+ 'timestamp': 1627797600,
'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3',
- 'uploader': 'International Olympic Committee',
+ 'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/nua4o7zwyaznoaejpbk2',
+ 'duration': 7017.0,
},
'params': {
'skip_download': True,
},
}, {
- 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp',
- 'only_matching': True,
+ 'url': 'https://olympics.com/en/original-series/episode/b-boys-and-b-girls-take-the-spotlight-breaking-life-road-to-paris-2024',
+ 'info_dict': {
+ 'id': '32633650-c5ee-4280-8b94-fb6defb6a9b5',
+ 'ext': 'mp4',
+ 'title': 'B-girl Nicka - Breaking Life, Road to Paris 2024 | Episode 1',
+ 'upload_date': '20240517',
+ 'timestamp': 1715948200,
+ 'description': 'md5:f63d728a41270ec628f6ac33ce471bb1',
+ 'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/a3j96l7j6so3vyfijby1',
+ 'duration': 1321.0,
+ },
+ }, {
+ 'url': 'https://olympics.com/en/paris-2024/videos/men-s-preliminaries-gbr-esp-ned-rsa-hockey-olympic-games-paris-2024',
+ 'info_dict': {
+ 'id': '3d96db23-8eee-4b7c-8ef5-488a0361026c',
+ 'ext': 'mp4',
+ 'title': 'Men\'s Preliminaries GBR-ESP & NED-RSA | Hockey | Olympic Games Paris 2024',
+ 'upload_date': '20240727',
+ 'timestamp': 1722066600,
+ },
+ 'skip': 'Geo-restricted to RU, BR, BT, NP, TM, BD, TL',
+ }, {
+ 'url': 'https://olympics.com/en/paris-2024/videos/dnp-suni-lee-i-have-goals-and-i-have-expectations-for-myself-but-i-also-am-trying-to-give-myself-grace',
+ 'info_dict': {
+ 'id': 'a42f37ab-8a74-41d0-a7d9-af27b7b02a90',
+ 'ext': 'mp4',
+ 'title': 'md5:c7cfbc9918636a98e66400a812e4d407',
+ 'upload_date': '20240729',
+ 'timestamp': 1722288600,
+ },
}]
+ _GEO_BYPASS = False
+
+ def _extract_from_nextjs_data(self, webpage, video_id):
+ data = traverse_obj(self._search_nextjs_data(webpage, video_id, default={}), (
+ 'props', 'pageProps', 'page', 'items',
+ lambda _, v: v['name'] == 'videoPlaylist', 'data', 'currentVideo', {dict}, any))
+ if not data:
+ return None
+
+ geo_countries = traverse_obj(data, ('countries', ..., {str}))
+ if traverse_obj(data, ('geoRestrictedVideo', {bool})):
+ self.raise_geo_restricted(countries=geo_countries)
+
+ is_live = traverse_obj(data, ('streamingStatus', {str})) == 'LIVE'
+ m3u8_url = traverse_obj(data, ('videoUrl', {url_or_none})) or data['streamUrl']
+ tokenized_url = m3u8_url if is_live else self._tokenize_url(m3u8_url, video_id)
+
+ try:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ tokenized_url, video_id, 'mp4', m3u8_id='hls')
+ except ExtractorError as e:
+ if isinstance(e.cause, HTTPError) and 'georestricted' in e.cause.msg:
+ self.raise_geo_restricted(countries=geo_countries)
+ raise
+
+ return {
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ **traverse_obj(data, {
+ 'id': ('videoID', {str}),
+ 'title': ('title', {str}),
+ 'timestamp': ('contentDate', {parse_iso8601}),
+ }),
+ }
+
+ def _tokenize_url(self, url, video_id):
+ return self._download_json(
+ 'https://olympics.com/tokenGenerator', video_id,
+ 'Downloading tokenized m3u8 url', query={'url': url})
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(url, video_id)
+
+ if info := self._extract_from_nextjs_data(webpage, video_id):
+ return info
+
title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage)
- uuid = self._html_search_meta('episode_uid', webpage)
+ video_uuid = self._html_search_meta('episode_uid', webpage)
m3u8_url = self._html_search_meta('video_url', webpage)
- json_ld = self._search_json_ld(webpage, uuid)
+ json_ld = self._search_json_ld(webpage, video_uuid)
thumbnails_list = json_ld.get('image')
if not thumbnails_list:
thumbnails_list = self._html_search_regex(
@@ -48,12 +128,12 @@ class OlympicsReplayIE(InfoExtractor):
'width': width,
'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)),
})
- m3u8_url = self._download_json(
- f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url')
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls')
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ self._tokenize_url(m3u8_url, video_uuid), video_uuid, 'mp4', m3u8_id='hls')
return {
- 'id': uuid,
+ 'id': video_uuid,
'title': title,
'thumbnails': thumbnails,
'formats': formats,
diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py
index e3e1055..d702640 100644
--- a/yt_dlp/extractor/tva.py
+++ b/yt_dlp/extractor/tva.py
@@ -1,60 +1,29 @@
import functools
import re
+from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none
from ..utils.traversal import traverse_obj
class TVAIE(InfoExtractor):
- _VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
+ IE_NAME = 'tvaplus'
+ IE_DESC = 'TVA+'
+ _VALID_URL = r'https?://(?:www\.)?tvaplus\.ca/(?:[^/?#]+/)*[\w-]+-(?P<id>\d+)(?:$|[#?])'
_TESTS = [{
- 'url': 'https://videos.tva.ca/details/_5596811470001',
- 'info_dict': {
- 'id': '5596811470001',
- 'ext': 'mp4',
- 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
- 'uploader_id': '5481942443001',
- 'upload_date': '20171003',
- 'timestamp': 1507064617,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'HTTP Error 404: Not Found',
- }, {
- 'url': 'https://video.tva.ca/details/_5596811470001',
- 'only_matching': True,
- }]
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
- 'ie_key': 'BrightcoveNew',
- }
-
-
-class QubIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
+ 'url': 'https://www.tvaplus.ca/tva/alerte-amber/saison-1/episode-01-1000036619',
'md5': '949490fd0e7aee11d0543777611fbd53',
'info_dict': {
'id': '6084352463001',
'ext': 'mp4',
- 'title': 'Ép 01. Mon dernier jour',
+ 'title': 'Mon dernier jour',
'uploader_id': '5481942443001',
'upload_date': '20190907',
'timestamp': 1567899756,
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
'thumbnail': r're:https://.+\.jpg',
- 'episode': 'Ép 01. Mon dernier jour',
+ 'episode': 'Mon dernier jour',
'episode_number': 1,
'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'],
'duration': 2625.963,
@@ -64,23 +33,36 @@ class QubIE(InfoExtractor):
'channel': 'TVA',
},
}, {
- 'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
- 'only_matching': True,
+ 'url': 'https://www.tvaplus.ca/tva/le-baiser-du-barbu/le-baiser-du-barbu-886644190',
+ 'info_dict': {
+ 'id': '6354448043112',
+ 'ext': 'mp4',
+ 'title': 'Le Baiser du barbu',
+ 'uploader_id': '5481942443001',
+ 'upload_date': '20240606',
+ 'timestamp': 1717694023,
+ 'description': 'md5:025b1219086c1cbf4bc27e4e034e8b57',
+ 'thumbnail': r're:https://.+\.jpg',
+ 'episode': 'Le Baiser du barbu',
+ 'tags': ['fullepisode', 'films'],
+ 'duration': 6053.504,
+ 'series': 'Le Baiser du barbu',
+ 'channel': 'TVA',
+ },
}]
- # reference_id also works with old account_id(5481942443001)
- # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
+ _BC_URL_TMPL = 'https://players.brightcove.net/5481942443001/default_default/index.html?videoId={}'
def _real_extract(self, url):
entity_id = self._match_id(url)
webpage = self._download_webpage(url, entity_id)
- entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData']
+ entity = self._search_nextjs_data(webpage, entity_id)['props']['pageProps']['staticEntity']
video_id = entity['videoId']
episode = strip_or_none(entity.get('name'))
return {
'_type': 'url_transparent',
- 'url': f'https://videos.tva.ca/details/_{video_id}',
- 'ie_key': TVAIE.ie_key(),
+ 'url': smuggle_url(self._BC_URL_TMPL.format(video_id), {'geo_countries': ['CA']}),
+ 'ie_key': BrightcoveNewIE.ie_key(),
'id': video_id,
'title': episode,
'episode': episode,
diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py
index 8105db4..c13832c 100644
--- a/yt_dlp/extractor/tver.py
+++ b/yt_dlp/extractor/tver.py
@@ -10,7 +10,7 @@ from ..utils import (
class TVerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'skip': 'videos are only available for 7 days',
'url': 'https://tver.jp/episodes/ep83nf3w4p',
@@ -24,6 +24,20 @@ class TVerIE(InfoExtractor):
},
'add_ie': ['BrightcoveNew'],
}, {
+ 'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/',
+ 'info_dict': {
+ 'id': '6359578055112',
+ 'ext': 'mp4',
+ 'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」',
+ 'timestamp': 1722279928,
+ 'upload_date': '20240729',
+ 'tags': ['20240729', 'japanese', 'japanmedal', 'paris'],
+ 'uploader_id': '4774017240001',
+ 'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg',
+ 'duration': 670.571,
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
'url': 'https://tver.jp/corner/f0103888',
'only_matching': True,
}, {
@@ -47,7 +61,15 @@ class TVerIE(InfoExtractor):
def _real_extract(self, url):
video_id, video_type = self._match_valid_url(url).group('id', 'type')
- if video_type not in {'series', 'episodes'}:
+
+ if video_type == 'olympic/paris2024/video':
+ # Player ID is taken from .content.brightcove.E200.pro.pc.account_id:
+ # https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d=
+ return self.url_result(smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id),
+ {'geo_countries': ['JP']}), 'BrightcoveNew')
+
+ elif video_type not in {'series', 'episodes'}:
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
video_id = self._match_id(self._search_regex(
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py
index 1e2d118..8b7ec1d 100644
--- a/yt_dlp/extractor/unsupported.py
+++ b/yt_dlp/extractor/unsupported.py
@@ -49,6 +49,7 @@ class KnownDRMIE(UnsupportedInfoExtractor):
r'amazon\.(?:\w{2}\.)?\w+/gp/video',
r'music\.amazon\.(?:\w{2}\.)?\w+',
r'(?:watch|front)\.njpwworld\.com',
+ r'qub\.ca/vrai',
)
_TESTS = [{
@@ -149,6 +150,9 @@ class KnownDRMIE(UnsupportedInfoExtractor):
}, {
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
'only_matching': True,
+ }, {
+ 'url': 'https://www.qub.ca/vrai/l-effet-bocuse-d-or/saison-1/l-effet-bocuse-d-or-saison-1-bande-annonce-1098225063',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index d10689c..a20cf4b 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -1267,7 +1267,7 @@ class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE
class VimeoReviewIE(VimeoBaseInfoExtractor):
IE_NAME = 'vimeo:review'
IE_DESC = 'Review pages on vimeo'
- _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
+ _VALID_URL = r'https?://vimeo\.com/(?P<user>[^/?#]+)/review/(?P<id>\d+)/(?P<hash>[\da-f]{10})'
_TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -1313,26 +1313,22 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
}]
def _real_extract(self, url):
- page_url, video_id = self._match_valid_url(url).groups()
- data = self._download_json(
- page_url.replace('/review/', '/review/data/'), video_id)
+ user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash')
+ data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}'
+ data = self._download_json(data_url, video_id)
if data.get('isLocked') is True:
video_password = self._get_video_password()
viewer = self._download_json(
'https://vimeo.com/_rv/viewer', video_id)
- webpage = self._verify_video_password(video_id, video_password, viewer['xsrft'])
- clip_page_config = self._parse_json(self._search_regex(
- r'window\.vimeo\.clip_page_config\s*=\s*({.+?});',
- webpage, 'clip page config'), video_id)
- config_url = clip_page_config['player']['config_url']
- clip_data = clip_page_config.get('clip') or {}
- else:
- clip_data = data['clipData']
- config_url = clip_data['configUrl']
+ self._verify_video_password(video_id, video_password, viewer['xsrft'])
+ data = self._download_json(data_url, video_id)
+ clip_data = data['clipData']
+ config_url = clip_data['configUrl']
config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
source_format = self._extract_original_format(
- page_url + '/action', video_id)
+ f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action', video_id,
+ unlisted_hash=traverse_obj(config_url, ({parse_qs}, 'h', -1)))
if source_format:
info_dict['formats'].append(source_format)
info_dict['description'] = clean_html(clip_data.get('description'))
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 7364e8a..88e1a28 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -72,133 +72,169 @@ STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
'web': {
- 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
- 'clientVersion': '2.20220801.00.00',
+ 'clientVersion': '2.20240726.00.00',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
+ },
+ # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
+ 'web_safari': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'WEB',
+ 'clientVersion': '2.20240726.00.00',
+ 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
},
'web_embedded': {
- 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_EMBEDDED_PLAYER',
- 'clientVersion': '1.20220731.00.00',
+ 'clientVersion': '1.20240723.01.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
},
'web_music': {
- 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
'INNERTUBE_HOST': 'music.youtube.com',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_REMIX',
- 'clientVersion': '1.20220727.01.00',
+ 'clientVersion': '1.20240724.00.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
},
'web_creator': {
- 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB_CREATOR',
- 'clientVersion': '1.20220726.00.00',
+ 'clientVersion': '1.20240723.03.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
},
'android': {
- 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID',
- 'clientVersion': '19.09.37',
+ 'clientVersion': '19.29.37',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
+ 'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip',
+ 'osName': 'Android',
+ 'osVersion': '11',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
'REQUIRE_JS_PLAYER': False,
},
- 'android_embedded': {
- 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
+ 'android_music': {
'INNERTUBE_CONTEXT': {
'client': {
- 'clientName': 'ANDROID_EMBEDDED_PLAYER',
- 'clientVersion': '19.09.37',
+ 'clientName': 'ANDROID_MUSIC',
+ 'clientVersion': '7.11.50',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
+ 'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip',
+ 'osName': 'Android',
+ 'osVersion': '11',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
'REQUIRE_JS_PLAYER': False,
},
- 'android_music': {
- 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
+ 'android_creator': {
'INNERTUBE_CONTEXT': {
'client': {
- 'clientName': 'ANDROID_MUSIC',
- 'clientVersion': '6.42.52',
+ 'clientName': 'ANDROID_CREATOR',
+ 'clientVersion': '24.30.100',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip',
+ 'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip',
+ 'osName': 'Android',
+ 'osVersion': '11',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
'REQUIRE_JS_PLAYER': False,
},
- 'android_creator': {
- 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
+ # YouTube Kids videos aren't returned on this client for some reason
+ 'android_vr': {
'INNERTUBE_CONTEXT': {
'client': {
- 'clientName': 'ANDROID_CREATOR',
- 'clientVersion': '22.30.100',
+ 'clientName': 'ANDROID_VR',
+ 'clientVersion': '1.57.29',
+ 'deviceMake': 'Oculus',
+ 'deviceModel': 'Quest 3',
+ 'androidSdkVersion': 32,
+ 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
+ 'osName': 'Android',
+ 'osVersion': '12L',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
+ 'REQUIRE_JS_PLAYER': False,
+ },
+ 'android_testsuite': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_TESTSUITE',
+ 'clientVersion': '1.9',
'androidSdkVersion': 30,
- 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip',
+ 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip',
+ 'osName': 'Android',
+ 'osVersion': '11',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 30,
'REQUIRE_JS_PLAYER': False,
+ 'PLAYER_PARAMS': '2AMB',
},
- # iOS clients have HLS live streams. Setting device model to get 60fps formats.
- # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
- 'ios': {
- 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
+ # This client only has legacy formats and storyboards
+ 'android_producer': {
'INNERTUBE_CONTEXT': {
'client': {
- 'clientName': 'IOS',
- 'clientVersion': '19.09.3',
- 'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
+ 'clientName': 'ANDROID_PRODUCER',
+ 'clientVersion': '0.111.1',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip',
+ 'osName': 'Android',
+ 'osVersion': '11',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 91,
'REQUIRE_JS_PLAYER': False,
},
- 'ios_embedded': {
+ # iOS clients have HLS live streams. Setting device model to get 60fps formats.
+ # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
+ 'ios': {
'INNERTUBE_CONTEXT': {
'client': {
- 'clientName': 'IOS_MESSAGES_EXTENSION',
- 'clientVersion': '19.09.3',
- 'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
+ 'clientName': 'IOS',
+ 'clientVersion': '19.29.1',
+ 'deviceMake': 'Apple',
+ 'deviceModel': 'iPhone16,2',
+ 'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
+ 'osName': 'iPhone',
+ 'osVersion': '17.5.1.21F90',
},
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
'REQUIRE_JS_PLAYER': False,
},
'ios_music': {
- 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MUSIC',
- 'clientVersion': '6.33.3',
- 'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
+ 'clientVersion': '7.08.2',
+ 'deviceMake': 'Apple',
+ 'deviceModel': 'iPhone16,2',
+ 'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
+ 'osName': 'iPhone',
+ 'osVersion': '17.5.1.21F90',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
@@ -208,9 +244,12 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_CREATOR',
- 'clientVersion': '22.33.101',
- 'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
+ 'clientVersion': '24.30.100',
+ 'deviceMake': 'Apple',
+ 'deviceModel': 'iPhone16,2',
+ 'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
+ 'osName': 'iPhone',
+ 'osVersion': '17.5.1.21F90',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
@@ -219,19 +258,26 @@ INNERTUBE_CLIENTS = {
# mweb has 'ultralow' formats
# See: https://github.com/yt-dlp/yt-dlp/pull/557
'mweb': {
- 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'MWEB',
- 'clientVersion': '2.20220801.00.00',
+ 'clientVersion': '2.20240726.01.00',
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
},
+ 'tv': {
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'TVHTML5',
+ 'clientVersion': '7.20240724.13.00',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
+ },
# This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
# See: https://github.com/zerodytrash/YouTube-Internal-Clients
'tv_embedded': {
- 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
@@ -249,6 +295,7 @@ INNERTUBE_CLIENTS = {
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
+ 'REQUIRE_JS_PLAYER': False,
},
}
@@ -262,7 +309,7 @@ def _split_innertube_client(client_name):
def short_client_name(client_name):
- main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_')
+ main, *parts = _split_innertube_client(client_name)[0].split('_')
return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
@@ -274,23 +321,18 @@ def build_innertube_clients():
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
- ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
+ ytcfg.setdefault('PLAYER_PARAMS', None)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
_, base_client, variant = _split_innertube_client(client)
ytcfg['priority'] = 10 * priority(base_client)
- if not variant:
- INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
- embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
- embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
- embedscreen['priority'] -= 3
- elif variant == 'embedded':
+ if variant == 'embedded':
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
ytcfg['priority'] -= 2
- else:
+ elif variant:
ytcfg['priority'] -= 3
@@ -566,9 +608,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
or req_api_hostname or self._get_innertube_host(default_client or 'web'))
- def _extract_api_key(self, ytcfg=None, default_client='web'):
- return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
-
def _extract_context(self, ytcfg=None, default_client='web'):
context = get_first(
(ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
@@ -614,13 +653,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
real_headers.update({'content-type': 'application/json'})
if headers:
real_headers.update(headers)
- api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
- or api_key or self._extract_api_key(default_client=default_client))
return self._download_json(
f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
data=json.dumps(data).encode('utf8'), headers=real_headers,
- query={'key': api_key, 'prettyPrint': 'false'})
+ query=filter_dict({
+ 'key': self._configuration_arg(
+ 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0],
+ 'prettyPrint': 'false',
+ }, cndn=lambda _, v: v))
def extract_yt_initial_data(self, item_id, webpage, fatal=True):
return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
@@ -972,7 +1013,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
ep=ep, fatal=True, headers=headers,
video_id=item_id, query=query, note=note,
context=self._extract_context(ytcfg, default_client),
- api_key=self._extract_api_key(ytcfg, default_client),
api_hostname=api_hostname, default_client=default_client)
except ExtractorError as e:
if not isinstance(e.cause, network_exceptions):
@@ -1295,6 +1335,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_POTOKEN_EXPERIMENTS = ('51217476', '51217102')
+ _BROKEN_CLIENTS = {
+ short_client_name(client): client
+ for client in ('android', 'android_creator', 'android_music')
+ }
_GEO_BYPASS = False
@@ -3129,19 +3173,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.write_debug(f'Decrypted nsig {s} => {ret}')
return ret
- def _extract_n_function_name(self, jscode):
+ def _extract_n_function_name(self, jscode, player_url=None):
+ # Examples (with placeholders nfunc, narray, idx):
+ # * .get("n"))&&(b=nfunc(b)
+ # * .get("n"))&&(b=narray[idx](b)
+ # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
+ # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
+ # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
funcname, idx = self._search_regex(
r'''(?x)
(?:
\.get\("n"\)\)&&\(b=|
(?:
b=String\.fromCharCode\(110\)|
- ([a-zA-Z0-9$.]+)&&\(b="nn"\[\+\1\]
- ),c=a\.get\(b\)\)&&\(c=
- )
- (?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''',
- jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
- if not idx:
+ (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
+ ),c=a\.get\(b\)\)&&\(c=|
+ \b(?P<var>[a-zA-Z0-9_$]+)=
+ )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
+ (?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''',
+ jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
+ if not funcname:
+ self.report_warning(join_nonempty(
+ 'Falling back to generic n function search',
+ player_url and f' player = {player_url}', delim='\n'))
+ return self._search_regex(
+ r'''(?xs)
+ ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
+ \s*\{(?:(?!};).)+?["']enhanced_except_''',
+ jscode, 'Initial JS player n function name', group='name')
+ elif not idx:
return funcname
return json.loads(js_to_json(self._search_regex(
@@ -3157,7 +3217,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if func_code:
return jsi, player_id, func_code
- func_name = self._extract_n_function_name(jscode)
+ func_name = self._extract_n_function_name(jscode, player_url=player_url)
func_code = jsi.extract_function_code(func_name)
@@ -3661,9 +3721,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'videoId': video_id,
}
- pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
- if pp_arg:
- yt_query['params'] = pp_arg
+ default_pp = traverse_obj(
+ INNERTUBE_CLIENTS, (_split_innertube_client(client)[0], 'PLAYER_PARAMS', {str}))
+ if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
+ yt_query['params'] = player_params
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
@@ -3675,8 +3736,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients(self, url, smuggled_data):
requested_clients = []
- android_clients = []
- default = ['ios', 'web']
+ broken_clients = []
+ default = ['ios', 'tv']
allowed_clients = sorted(
(client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
@@ -3687,18 +3748,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
requested_clients.extend(allowed_clients)
elif client not in allowed_clients:
self.report_warning(f'Skipping unsupported client {client}')
- elif client.startswith('android'):
- android_clients.append(client)
+ elif client in self._BROKEN_CLIENTS.values():
+ broken_clients.append(client)
else:
requested_clients.append(client)
- # Force deprioritization of broken Android clients for format de-duplication
- requested_clients.extend(android_clients)
+ # Force deprioritization of _BROKEN_CLIENTS for format de-duplication
+ requested_clients.extend(broken_clients)
if not requested_clients:
requested_clients = default
if smuggled_data.get('is_music_url') or self.is_music_url(url):
- requested_clients.extend(
- f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
+ for requested_client in requested_clients:
+ _, base_client, variant = _split_innertube_client(requested_client)
+ music_client = f'{base_client}_music'
+ if variant != 'music' and music_client in INNERTUBE_CLIENTS:
+ requested_clients.append(music_client)
return orderedSet(requested_clients)
@@ -3792,14 +3856,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f[STREAMING_DATA_CLIENT_NAME] = name
prs.append(pr)
- # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
- if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
- append_client(f'{base_client}_creator')
- elif self._is_agegated(pr):
- if variant == 'tv_embedded':
- append_client(f'{base_client}_embedded')
- elif not variant:
- append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
+ # tv_embedded can work around age-gate and age-verification IF the video is embeddable
+ if self._is_agegated(pr) and variant != 'tv_embedded':
+ append_client(f'tv_embedded.{base_client}')
+
+ # Unauthenticated users will only get tv_embedded client formats if age-gated
+ if self._is_agegated(pr) and not self.is_authenticated:
+ self.to_screen(
+ f'{video_id}: This video is age-restricted; some formats may be missing '
+ f'without authentication. {self._login_hint()}', only_once=True)
+
+ # EU countries require age-verification for accounts to access age-restricted videos
+ # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
+ # If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded
+ embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr)
+ if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
+ self.to_screen(
+ f'{video_id}: This video is age-restricted and YouTube is requiring '
+ 'account age-verification; some formats may be missing', only_once=True)
+ # web_creator and mediaconnect can work around the age-verification requirement
+ # _producer, _testsuite, & _vr variants can also work around age-verification
+ append_client('web_creator', 'mediaconnect')
if skipped_clients:
self.report_warning(
@@ -3935,13 +4012,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
- # Android client formats are broken due to integrity check enforcement
+ # _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
- is_broken = client_name and client_name.startswith(short_client_name('android'))
+ is_broken = client_name in self._BROKEN_CLIENTS
if is_broken:
self.report_warning(
- f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
- 'They will be deprioritized', only_once=True)
+ f'{video_id}: {self._BROKEN_CLIENTS[client_name]} client formats are broken '
+ 'and may yield HTTP Error 403. They will be deprioritized', only_once=True)
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
fps = int_or_none(fmt.get('fps')) or 0
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 3e3b285..0d3e707 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -1217,7 +1217,7 @@ def unified_timestamp(date_str, day_first=True):
return None
date_str = re.sub(r'\s+', ' ', re.sub(
- r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
+ r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?|sun)(day)?', '', date_str))
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
timezone, date_str = extract_timezone(date_str)
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index e641bf5..81d1c2c 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
-__version__ = '2024.07.25'
+__version__ = '2024.08.01'
-RELEASE_GIT_HEAD = 'f0993391e6052ec8f7aacc286609564f226943b9'
+RELEASE_GIT_HEAD = 'ffd7781d6588926f820b44a34b9e6e3068fb9f97'
VARIANT = None
@@ -12,4 +12,4 @@ CHANNEL = 'stable'
ORIGIN = 'yt-dlp/yt-dlp'
-_pkg_version = '2024.07.25'
+_pkg_version = '2024.08.01'