diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 09:07:51 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 09:07:51 +0000 |
commit | 3278ab0765a50bc8a4716ce5c0b3aa7015a3e3d5 (patch) | |
tree | 65228f3cad4dcb6dcf7138ebdc80329c966010d3 /yt_dlp | |
parent | Releasing progress-linux version 2024.07.25-1~progress7.99u1. (diff) | |
download | yt-dlp-3278ab0765a50bc8a4716ce5c0b3aa7015a3e3d5.tar.xz yt-dlp-3278ab0765a50bc8a4716ce5c0b3aa7015a3e3d5.zip |
Merging upstream version 2024.08.01.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | yt_dlp/extractor/_extractors.py | 7 | ||||
-rw-r--r-- | yt_dlp/extractor/abematv.py | 3 | ||||
-rw-r--r-- | yt_dlp/extractor/cbc.py | 269 | ||||
-rw-r--r-- | yt_dlp/extractor/common.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/dplay.py | 5 | ||||
-rw-r--r-- | yt_dlp/extractor/kick.py | 205 | ||||
-rw-r--r-- | yt_dlp/extractor/learningonscreen.py | 78 | ||||
-rw-r--r-- | yt_dlp/extractor/mediaklikk.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/mlb.py | 20 | ||||
-rw-r--r-- | yt_dlp/extractor/olympics.py | 106 | ||||
-rw-r--r-- | yt_dlp/extractor/tva.py | 72 | ||||
-rw-r--r-- | yt_dlp/extractor/tver.py | 26 | ||||
-rw-r--r-- | yt_dlp/extractor/unsupported.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/vimeo.py | 24 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 279 | ||||
-rw-r--r-- | yt_dlp/utils/_utils.py | 2 | ||||
-rw-r--r-- | yt_dlp/version.py | 6 |
17 files changed, 813 insertions, 305 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index d2140bc..9b73fcd 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -939,6 +939,7 @@ from .khanacademy import ( KhanAcademyUnitIE, ) from .kick import ( + KickClipIE, KickIE, KickVODIE, ) @@ -986,6 +987,7 @@ from .lcp import ( LcpIE, LcpPlayIE, ) +from .learningonscreen import LearningOnScreenIE from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioCourseIE, @@ -2169,10 +2171,7 @@ from .tv5unis import ( TV5UnisVideoIE, ) from .tv24ua import TV24UAVideoIE -from .tva import ( - TVAIE, - QubIE, -) +from .tva import TVAIE from .tvanouvelles import ( TVANouvellesArticleIE, TVANouvellesIE, diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 2611c6f..66ab083 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -377,8 +377,7 @@ class AbemaTVIE(AbemaTVBaseIE): f'https://api.abema.io/v1/video/programs/{video_id}', video_id, note='Checking playability', headers=headers) - ondemand_types = traverse_obj(api_response, ('terms', ..., 'onDemandType')) - if 3 not in ondemand_types: + if not traverse_obj(api_response, ('label', 'free', {bool})): # cannot acquire decryption key for these streams self.report_warning('This is a premium-only stream') availability = 'premium_only' diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 1522b08..373c9d2 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -1,4 +1,5 @@ import base64 +import functools import json import re import time @@ -6,17 +7,24 @@ import urllib.parse import xml.etree.ElementTree from .common import InfoExtractor +from ..networking import HEADRequest from ..utils import ( ExtractorError, + float_or_none, int_or_none, join_nonempty, js_to_json, + mimetype2ext, orderedSet, parse_iso8601, + replace_extension, smuggle_url, strip_or_none, traverse_obj, try_get, + update_url, + url_basename, + url_or_none, ) @@ -149,6 +157,7 @@ class CBCIE(InfoExtractor): class CBCPlayerIE(InfoExtractor): IE_NAME = 'cbc.ca:player' _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)' + _GEO_COUNTRIES = ['CA'] _TESTS = [{ 'url': 'http://www.cbc.ca/player/play/2683190193', 'md5': '64d25f841ddf4ddb28a235338af32e2c', @@ -172,21 +181,20 @@ class CBCPlayerIE(InfoExtractor): 'description': 'md5:dd3b692f0a139b0369943150bd1c46a9', 'timestamp': 1425704400, 'upload_date': '20150307', - 'uploader': 'CBCC-NEW', - 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg', + 'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg', 'chapters': [], 'duration': 494.811, - 'categories': ['AudioMobile/All in a Weekend Montreal'], - 'tags': 'count:8', + 'categories': ['All in a Weekend Montreal'], + 'tags': 'count:11', 'location': 'Quebec', 'series': 'All in a Weekend Montreal', 'season': 'Season 2015', 'season_number': 2015, 'media_type': 'Excerpt', + 'genres': ['Other'], }, }, { 'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062', - 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6', 'info_dict': { 'id': '2164402062', 'ext': 'mp4', @@ -194,107 +202,168 @@ class CBCPlayerIE(InfoExtractor): 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', 'timestamp': 1320410746, 'upload_date': '20111104', - 'uploader': 'CBCC-NEW', - 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg', + 'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg', 'chapters': [], 'duration': 186.867, 'series': 'CBC News: Windsor at 6:00', - 'categories': ['News/Canada/Windsor'], + 'categories': ['Windsor'], 'location': 'Windsor', - 'tags': ['cancer'], - 'creators': ['Allison Johnson'], + 'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'], 'media_type': 'Excerpt', + 'genres': ['News'], }, + 'params': {'skip_download': 'm3u8'}, }, { # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ 'url': 'https://www.cbc.ca/player/play/1.2985700', 'md5': 'e5e708c34ae6fca156aafe17c43e8b75', 'info_dict': { - 'id': '2657631896', + 'id': '1.2985700', 'ext': 'mp3', 'title': 'CBC Montreal is organizing its first ever community hackathon!', 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.', 'timestamp': 1425704400, 'upload_date': '20150307', - 'uploader': 'CBCC-NEW', - 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg', + 'thumbnail': 'https://i.cbc.ca/ais/1.2985700,1717262248558/full/max/0/default.jpg', 'chapters': [], 'duration': 494.811, - 'categories': ['AudioMobile/All in a Weekend Montreal'], - 'tags': 'count:8', + 'categories': ['All in a Weekend Montreal'], + 'tags': 'count:11', 'location': 'Quebec', 'series': 'All in a Weekend Montreal', 'season': 'Season 2015', 'season_number': 2015, 'media_type': 'Excerpt', + 'genres': ['Other'], }, }, { 'url': 'https://www.cbc.ca/player/play/1.1711287', - 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6', 'info_dict': { - 'id': '2164402062', + 'id': '1.1711287', 'ext': 'mp4', 'title': 'Cancer survivor four times over', 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', 'timestamp': 1320410746, 'upload_date': '20111104', - 'uploader': 'CBCC-NEW', - 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg', + 'thumbnail': 'https://i.cbc.ca/ais/1.1711287,1717139372111/full/max/0/default.jpg', 'chapters': [], 'duration': 186.867, 'series': 'CBC News: Windsor at 6:00', - 'categories': ['News/Canada/Windsor'], + 'categories': ['Windsor'], 'location': 'Windsor', - 'tags': ['cancer'], - 'creators': ['Allison Johnson'], + 'tags': ['Cancer', 'News/Canada/Windsor', 'Windsor'], 'media_type': 'Excerpt', + 'genres': ['News'], }, + 'params': {'skip_download': 'm3u8'}, }, { # Has subtitles # These broadcasts expire after ~1 month, can find new test URL here: # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast - 'url': 'https://www.cbc.ca/player/play/1.7159484', - 'md5': '6ed6cd0fc2ef568d2297ba68a763d455', + 'url': 'https://www.cbc.ca/player/play/video/9.6424403', + 'md5': '8025909eaffcf0adf59922904def9a5e', 'info_dict': { - 'id': '2324213316001', + 'id': '9.6424403', 'ext': 'mp4', - 'title': 'The National | School boards sue social media giants', - 'description': 'md5:4b4db69322fa32186c3ce426da07402c', - 'timestamp': 1711681200, - 'duration': 2743.400, - 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]}, - 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg', - 'uploader': 'CBCC-NEW', + 'title': 'The National | N.W.T. wildfire emergency', + 'description': 'md5:ada33d36d1df69347ed575905bfd496c', + 'timestamp': 1718589600, + 'duration': 2692.833, + 'subtitles': { + 'en-US': [{ + 'name': 'English Captions', + 'url': 'https://cbchls.akamaized.net/delivery/news-shows/2024/06/17/NAT_JUN16-00-55-00/NAT_JUN16_cc.vtt', + }], + }, + 'thumbnail': 'https://i.cbc.ca/ais/6272b5c6-5e78-4c05-915d-0e36672e33d1,1714756287822/full/max/0/default.jpg', 'chapters': 'count:5', - 'upload_date': '20240329', - 'categories': 'count:4', + 'upload_date': '20240617', + 'categories': ['News', 'The National', 'The National Latest Broadcasts'], 'series': 'The National - Full Show', - 'tags': 'count:1', - 'creators': ['News'], + 'tags': ['The National'], 'location': 'Canada', 'media_type': 'Full Program', + 'genres': ['News'], }, }, { 'url': 'https://www.cbc.ca/player/play/video/1.7194274', 'md5': '188b96cf6bdcb2540e178a6caa957128', 'info_dict': { - 'id': '2334524995812', + 'id': '1.7194274', 'ext': 'mp4', 'title': '#TheMoment a rare white spirit moose was spotted in Alberta', 'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3', 'timestamp': 1714788791, 'duration': 77.678, 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]}, - 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg', - 'uploader': 'CBCC-NEW', - 'chapters': 'count:0', - 'upload_date': '20240504', + 'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg', + 'chapters': [], 'categories': 'count:3', 'series': 'The National', - 'tags': 'count:15', - 'creators': ['encoder'], + 'tags': 'count:17', + 'location': 'Canada', + 'media_type': 'Excerpt', + 'upload_date': '20240504', + 'genres': ['News'], + }, + }, { + 'url': 'https://www.cbc.ca/player/play/video/9.6427282', + 'info_dict': { + 'id': '9.6427282', + 'ext': 'mp4', + 'title': 'Men\'s Soccer - Argentina vs Morocco', + 'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.', + 'series': 'CBC Sports', + 'media_type': 'Event Coverage', + 'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg', + 'timestamp': 1721825400.0, + 'upload_date': '20240724', + 'duration': 10568.0, + 'chapters': [], + 'genres': [], + 'tags': ['2024 Paris Olympic Games'], + 'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'], 'location': 'Canada', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.cbc.ca/player/play/video/9.6459530', + 'md5': '6c1bb76693ab321a2e99c347a1d5ecbc', + 'info_dict': { + 'id': '9.6459530', + 'ext': 'mp4', + 'title': 'Parts of Jasper incinerated as wildfire rages', + 'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962', + 'series': 'The National', 'media_type': 'Excerpt', + 'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg', + 'timestamp': 1721964091.012, + 'upload_date': '20240726', + 'duration': 952.285, + 'chapters': [], + 'genres': [], + 'tags': 'count:23', + 'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '], + }, + }, { + 'url': 'https://www.cbc.ca/player/play/video/9.6420651', + 'md5': '71a850c2c6ee5e912de169f5311bb533', + 'info_dict': { + 'id': '9.6420651', + 'ext': 'mp4', + 'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton', + 'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3', + 'series': 'CBC News Edmonton', + 'media_type': 'Excerpt', + 'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg', + 'timestamp': 1718220065.768, + 'upload_date': '20240612', + 'duration': 286.086, + 'chapters': [], + 'genres': ['News'], + 'categories': ['News', 'Edmonton'], + 'tags': 'count:7', + 'location': 'Edmonton', }, }, { 'url': 'cbcplayer:1.7159484', @@ -307,23 +376,113 @@ class CBCPlayerIE(InfoExtractor): 'only_matching': True, }] + def _parse_param(self, asset_data, name): + return traverse_obj(asset_data, ('params', lambda _, v: v['name'] == name, 'value', {str}, any)) + def _real_extract(self, url): video_id = self._match_id(url) - if '.' in video_id: - webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id) - video_id = self._search_json( - r'window\.__INITIAL_STATE__\s*=', webpage, - 'initial state', video_id)['video']['currentClip']['mediaId'] + webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id) + data = self._search_json( + r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)['video']['currentClip'] + assets = traverse_obj( + data, ('media', 'assets', lambda _, v: url_or_none(v['key']) and v['type'])) + + if not assets and (media_id := traverse_obj(data, ('mediaId', {str}))): + # XXX: Deprecated; CBC is migrating off of ThePlatform + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', { + 'force_smil_url': True, + }), + 'id': media_id, + '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS + } + + is_live = traverse_obj(data, ('media', 'streamType', {str})) == 'Live' + formats, subtitles = [], {} + + for sub in traverse_obj(data, ('media', 'textTracks', lambda _, v: url_or_none(v['src']))): + subtitles.setdefault(sub.get('language') or 'und', []).append({ + 'url': sub['src'], + 'name': sub.get('label'), + }) + + for asset in assets: + asset_key = asset['key'] + asset_type = asset['type'] + if asset_type != 'medianet': + self.report_warning(f'Skipping unsupported asset type "{asset_type}": {asset_key}') + continue + asset_data = self._download_json(asset_key, video_id, f'Downloading {asset_type} JSON') + ext = mimetype2ext(self._parse_param(asset_data, 'contentType')) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + asset_data['url'], video_id, 'mp4', m3u8_id='hls', live=is_live) + formats.extend(fmts) + # Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available + if not subtitles: + self._merge_subtitles(subs, target=subtitles) + if is_live or not fmts: + continue + # Check for direct https mp4 format + best_video_fmt = traverse_obj(fmts, ( + lambda _, v: v.get('vcodec') != 'none' and v['tbr'], all, + {functools.partial(sorted, key=lambda x: x['tbr'])}, -1, {dict})) or {} + base_url = self._search_regex( + r'(https?://[^?#]+?/)hdntl=', best_video_fmt.get('url'), 'base url', default=None) + if not base_url or '/live/' in base_url: + continue + mp4_url = base_url + replace_extension(url_basename(best_video_fmt['url']), 'mp4') + if self._request_webpage( + HEADRequest(mp4_url), video_id, 'Checking for https format', + errnote=False, fatal=False): + formats.append({ + **best_video_fmt, + 'url': mp4_url, + 'format_id': 'https-mp4', + 'protocol': 'https', + 'manifest_url': None, + 'acodec': None, + }) + else: + formats.append({ + 'url': asset_data['url'], + 'ext': ext, + 'vcodec': 'none' if self._parse_param(asset_data, 'mediaType') == 'audio' else None, + }) + + chapters = traverse_obj(data, ( + 'media', 'chapters', lambda _, v: float(v['startTime']) is not None, { + 'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}), + 'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}), + 'title': ('name', {str}), + })) + # Filter out pointless single chapters with start_time==0 and no end_time + if len(chapters) == 1 and not (chapters[0].get('start_time') or chapters[0].get('end_time')): + chapters = [] return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url( - f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', { - 'force_smil_url': True, - }), + **traverse_obj(data, { + 'title': ('title', {str}), + 'description': ('description', {str.strip}), + 'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}), + 'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}), + 'media_type': ('media', 'clipType', {str}), + 'series': ('showName', {str}), + 'season_number': ('media', 'season', {int_or_none}), + 'duration': ('media', 'duration', {float_or_none}, {lambda x: None if is_live else x}), + 'location': ('media', 'region', {str}), + 'tags': ('tags', ..., 'name', {str}), + 'genres': ('media', 'genre', all), + 'categories': ('categories', ..., 'name', {str}), + }), 'id': video_id, - '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS + 'formats': formats, + 'subtitles': subtitles, + 'chapters': chapters, + 'is_live': is_live, } diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f63bd78..187f73e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3150,7 +3150,7 @@ class InfoExtractor: }) return formats, subtitles - def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None): + def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8_native', mpd_id=None, preference=None, quality=None, _headers=None): def absolute_url(item_url): return urljoin(base_url, item_url) @@ -3174,11 +3174,11 @@ class InfoExtractor: formats = self._extract_m3u8_formats( full_url, video_id, ext='mp4', entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id, - preference=preference, quality=quality, fatal=False) + preference=preference, quality=quality, fatal=False, headers=_headers) elif ext == 'mpd': is_plain_url = False formats = self._extract_mpd_formats( - full_url, video_id, mpd_id=mpd_id, fatal=False) + full_url, video_id, mpd_id=mpd_id, fatal=False, headers=_headers) else: is_plain_url = True formats = [{ @@ -3272,6 +3272,8 @@ class InfoExtractor: }) for f in media_info['formats']: f.setdefault('http_headers', {})['Referer'] = base_url + if _headers: + f['http_headers'].update(_headers) if media_info['formats'] or media_info['subtitles']: entries.append(media_info) return entries diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index e9f9357..cdf84c5 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -934,7 +934,7 @@ class TLCIE(DiscoveryPlusBaseIE): class DiscoveryPlusIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport)?' + DPlayBaseIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:(?P<country>[a-z]{2})/)?video(?:/sport|/olympics)?' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', 'info_dict': { @@ -958,6 +958,9 @@ class DiscoveryPlusIE(DiscoveryPlusBaseIE): }, { 'url': 'https://www.discoveryplus.com/gb/video/sport/eurosport-1-british-eurosport-1-british-sport/6-hours-of-spa-review', 'only_matching': True, + }, { + 'url': 'https://www.discoveryplus.com/gb/video/olympics/dplus-sport-dplus-sport-sport/rugby-sevens-australia-samoa', + 'only_matching': True, }] _PRODUCT = None diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py index 889548f..1c1b2a1 100644 --- a/yt_dlp/extractor/kick.py +++ b/yt_dlp/extractor/kick.py @@ -1,9 +1,14 @@ +import functools + from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( UserNotLive, + determine_ext, float_or_none, + int_or_none, merge_dicts, + parse_iso8601, str_or_none, traverse_obj, unified_timestamp, @@ -25,104 +30,192 @@ class KickBaseIE(InfoExtractor): def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs): return self._download_json( - f'https://kick.com/api/v1/{path}', display_id, note=note, + f'https://kick.com/api/{path}', display_id, note=note, headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs) class KickIE(KickBaseIE): + IE_NAME = 'kick:live' _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)' _TESTS = [{ - 'url': 'https://kick.com/yuppy', + 'url': 'https://kick.com/buddha', 'info_dict': { - 'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21', + 'id': '92722911-nopixel-40', 'ext': 'mp4', 'title': str, 'description': str, - 'channel': 'yuppy', - 'channel_id': '33538', - 'uploader': 'Yuppy', - 'uploader_id': '33793', - 'upload_date': str, - 'live_status': 'is_live', 'timestamp': int, - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:https?://.+\.jpg', 'categories': list, + 'upload_date': str, + 'channel': 'buddha', + 'channel_id': '32807', + 'uploader': 'Buddha', + 'uploader_id': '33057', + 'live_status': 'is_live', + 'concurrent_view_count': int, + 'release_timestamp': int, + 'age_limit': 18, + 'release_date': str, }, - 'skip': 'livestream', + 'params': {'skip_download': 'livestream'}, + # 'skip': 'livestream', }, { - 'url': 'https://kick.com/kmack710', + 'url': 'https://kick.com/xqc', 'only_matching': True, }] + @classmethod + def suitable(cls, url): + return False if KickClipIE.suitable(url) else super().suitable(url) + def _real_extract(self, url): channel = self._match_id(url) - response = self._call_api(f'channels/{channel}', channel) + response = self._call_api(f'v2/channels/{channel}', channel) if not traverse_obj(response, 'livestream', expected_type=dict): raise UserNotLive(video_id=channel) return { - 'id': str(traverse_obj( - response, ('livestream', ('slug', 'id')), get_all=False, default=channel)), - 'formats': self._extract_m3u8_formats( - response['playback_url'], channel, 'mp4', live=True), - 'title': traverse_obj( - response, ('livestream', ('session_title', 'slug')), get_all=False, default=''), - 'description': traverse_obj(response, ('user', 'bio')), 'channel': channel, - 'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))), - 'uploader': traverse_obj(response, 'name', ('user', 'username')), - 'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))), 'is_live': True, - 'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))), - 'thumbnail': traverse_obj( - response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none), - 'categories': traverse_obj(response, ('recent_categories', ..., 'name')), + 'formats': self._extract_m3u8_formats(response['playback_url'], channel, 'mp4', live=True), + **traverse_obj(response, { + 'id': ('livestream', 'slug', {str}), + 'title': ('livestream', 'session_title', {str}), + 'description': ('user', 'bio', {str}), + 'channel_id': (('id', ('livestream', 'channel_id')), {int}, {str_or_none}, any), + 'uploader': (('name', ('user', 'username')), {str}, any), + 'uploader_id': (('user_id', ('user', 'id')), {int}, {str_or_none}, any), + 'timestamp': ('livestream', 'created_at', {unified_timestamp}), + 'release_timestamp': ('livestream', 'start_time', {unified_timestamp}), + 'thumbnail': ('livestream', 'thumbnail', 'url', {url_or_none}), + 'categories': ('recent_categories', ..., 'name', {str}), + 'concurrent_view_count': ('livestream', 'viewer_count', {int_or_none}), + 'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}), + }), } class KickVODIE(KickBaseIE): + IE_NAME = 'kick:vod' _VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' _TESTS = [{ - 'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3', + 'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c', 'md5': '3870f94153e40e7121a6e46c068b70cb', 'info_dict': { - 'id': '58bac65b-e641-4476-a7ba-3707a35e60e3', + 'id': 'e74614f4-5270-4319-90ad-32179f19a45c', 'ext': 'mp4', - 'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠', - 'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d', - 'channel': 'jaredfps', - 'channel_id': '26608', - 'uploader': 'JaredFPS', - 'uploader_id': '26799', - 'upload_date': '20240402', - 'timestamp': 1712097108, - 'duration': 33859.0, + 'title': r're:❎ MEGA DRAMA ❎ LIVE ❎ CLICK ❎ ULTIMATE SKILLS .+', + 'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.', + 'channel': 'xqc', + 'channel_id': '668', + 'uploader': 'xQc', + 'uploader_id': '676', + 'upload_date': '20240724', + 'timestamp': 1721796562, + 'duration': 18566.0, 'thumbnail': r're:^https?://.*\.jpg', - 'categories': ['Call of Duty: Warzone'], + 'view_count': int, + 'categories': ['VALORANT'], + 'age_limit': 0, }, - 'params': { - 'skip_download': 'm3u8', - }, - 'expected_warnings': [r'impersonation'], + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): video_id = self._match_id(url) - response = self._call_api(f'video/{video_id}', video_id) + response = self._call_api(f'v1/video/{video_id}', video_id) return { 'id': video_id, 'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'), - 'title': traverse_obj( - response, ('livestream', ('session_title', 'slug')), get_all=False, default=''), - 'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')), - 'channel': traverse_obj(response, ('livestream', 'channel', 'slug')), - 'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))), - 'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')), - 'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))), - 'timestamp': unified_timestamp(response.get('created_at')), - 'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000), - 'thumbnail': traverse_obj( - response, ('livestream', 'thumbnail'), expected_type=url_or_none), - 'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')), + **traverse_obj(response, { + 'title': ('livestream', ('session_title', 'slug'), {str}, any), + 'description': ('livestream', 'channel', 'user', 'bio', {str}), + 'channel': ('livestream', 'channel', 'slug', {str}), + 'channel_id': ('livestream', 'channel', 'id', {int}, {str_or_none}), + 'uploader': ('livestream', 'channel', 'user', 'username', {str}), + 'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}), + 'timestamp': ('created_at', {parse_iso8601}), + 'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}), + 'thumbnail': ('livestream', 'thumbnail', {url_or_none}), + 'categories': ('livestream', 'categories', ..., 'name', {str}), + 'view_count': ('views', {int_or_none}), + 'age_limit': ('livestream', 'is_mature', {bool}, {lambda x: 18 if x else 0}), + }), + } + + +class KickClipIE(KickBaseIE): + IE_NAME = 'kick:clips' + _VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?P<id>clip_[\w-]+)' + _TESTS = [{ + 'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X', + 'info_dict': { + 'id': 'clip_01GYXVB5Y8PWAPWCWMSBCFB05X', + 'ext': 'mp4', + 'title': 'Maddy detains Abd D:', + 'channel': 'mxddy', + 'channel_id': '133789', + 'uploader': 'AbdCreates', + 'uploader_id': '3309077', + 'thumbnail': r're:^https?://.*\.jpeg', + 'duration': 35, + 'timestamp': 1682481453, + 'upload_date': '20230426', + 'view_count': int, + 'like_count': int, + 'categories': ['VALORANT'], + 'age_limit': 18, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://kick.com/destiny?clip=clip_01H9SKET879NE7N9RJRRDS98J3', + 'info_dict': { + 'id': 'clip_01H9SKET879NE7N9RJRRDS98J3', + 'title': 'W jews', + 'ext': 'mp4', + 'channel': 'destiny', + 'channel_id': '1772249', + 'uploader': 'punished_furry', + 'uploader_id': '2027722', + 'duration': 49.0, + 'upload_date': '20230908', + 'timestamp': 1694150180, + 'thumbnail': 'https://clips.kick.com/clips/j3/clip_01H9SKET879NE7N9RJRRDS98J3/thumbnail.png', + 'view_count': int, + 'like_count': int, + 'categories': ['Just Chatting'], + 'age_limit': 0, + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + clip_id = self._match_id(url) + clip = self._call_api(f'v2/clips/{clip_id}/play', clip_id)['clip'] + clip_url = clip['clip_url'] + + if determine_ext(clip_url) == 'm3u8': + formats = self._extract_m3u8_formats(clip_url, clip_id, 'mp4') + else: + formats = [{'url': clip_url}] + + return { + 'id': clip_id, + 'formats': formats, + **traverse_obj(clip, { + 'title': ('title', {str}), + 'channel': ('channel', 'slug', {str}), + 'channel_id': ('channel', 'id', {int}, {str_or_none}), + 'uploader': ('creator', 'username', {str}), + 'uploader_id': ('creator', 'id', {int}, {str_or_none}), + 'thumbnail': ('thumbnail_url', {url_or_none}), + 'duration': ('duration', {float_or_none}), + 'categories': ('category', 'name', {str}, all), + 'timestamp': ('created_at', {parse_iso8601}), + 'view_count': ('views', {int_or_none}), + 'like_count': ('likes', {int_or_none}), + 'age_limit': ('is_mature', {bool}, {lambda x: 18 if x else 0}), + }), } diff --git a/yt_dlp/extractor/learningonscreen.py b/yt_dlp/extractor/learningonscreen.py new file mode 100644 index 0000000..dcf8314 --- /dev/null +++ b/yt_dlp/extractor/learningonscreen.py @@ -0,0 +1,78 @@ +import functools +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_id, + join_nonempty, + parse_duration, + unified_timestamp, +) +from ..utils.traversal import traverse_obj + + +class LearningOnScreenIE(InfoExtractor): + _VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013', + 'info_dict': { + 'id': '005D81B2', + 'ext': 'mp4', + 'title': 'Planet Earth', + 'duration': 3600.0, + 'timestamp': 1164567600.0, + 'upload_date': '20061126', + 'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg', + }, + }] + + def _real_initialize(self): + if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'): + self.raise_login_required( + 'Use --cookies for authentication. See ' + ' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp ' + 'for how to manually pass cookies', method=None) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + details = traverse_obj(webpage, ( + {functools.partial(get_element_html_by_id, 'programme-details')}, { + 'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}), + 'timestamp': ( + {functools.partial(get_element_by_class, 'broadcast-date')}, + {functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}), + 'duration': ( + {functools.partial(get_element_by_class, 'prog-running-time')}, + {clean_html}, {parse_duration}), + })) + + title = details.pop('title', None) or traverse_obj(webpage, ( + {functools.partial(get_element_html_by_id, 'add-to-existing-playlist')}, + {extract_attributes}, 'data-record-title', {clean_html})) + + entries = self._parse_html5_media_entries( + 'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash', + _headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'}) + if not entries: + raise ExtractorError('No video found') + + if len(entries) > 1: + duration = details.pop('duration', None) + for idx, entry in enumerate(entries, start=1): + entry.update(details) + entry['id'] = join_nonempty(video_id, idx) + entry['title'] = join_nonempty(title, idx) + return self.playlist_result(entries, video_id, title, duration=duration) + + return { + **entries[0], + **details, + 'id': video_id, + 'title': title, + } diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index bd1a27f..f513420 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -133,7 +133,9 @@ class MediaKlikkIE(InfoExtractor): r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None)) player_data['video'] = player_data.pop('token') - player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data) + player_page = self._download_webpage( + 'https://player.mediaklikk.hu/playernew/player.php', video_id, + query=player_data, headers={'Referer': url}) player_json = self._search_json( r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);') playlist_url = traverse_obj( diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index 230c218..935bf85 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -290,9 +290,18 @@ class MLBTVIE(InfoExtractor): 'release_date': '20220702', 'release_timestamp': 1656792300, }, - 'params': { - 'skip_download': True, + 'params': {'skip_download': 'm3u8'}, + }, { + # makeup game: has multiple dates, need to avoid games with 'rescheduleDate' + 'url': 'https://www.mlb.com/tv/g747039/vd22541c4-5a29-45f7-822b-635ec041cf5e', + 'info_dict': { + 'id': '747039', + 'ext': 'mp4', + 'title': '2024-07-29 - Toronto Blue Jays @ Baltimore Orioles', + 'release_date': '20240729', + 'release_timestamp': 1722280200, }, + 'params': {'skip_download': 'm3u8'}, }] _GRAPHQL_INIT_QUERY = '''\ mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) { @@ -463,11 +472,14 @@ mutation initPlaybackSession( def _real_extract(self, url): video_id = self._match_id(url) - metadata = traverse_obj(self._download_json( + data = self._download_json( 'https://statsapi.mlb.com/api/v1/schedule', video_id, query={ 'gamePk': video_id, 'hydrate': 'broadcasts(all),statusFlags', - }), ('dates', ..., 'games', lambda _, v: str(v['gamePk']) == video_id and v['broadcasts'], any)) + }) + metadata = traverse_obj(data, ( + 'dates', ..., 'games', + lambda _, v: str(v['gamePk']) == video_id and not v.get('rescheduleDate'), any)) broadcasts = traverse_obj(metadata, ( 'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF')) diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index becf052..a50c510 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -1,9 +1,17 @@ from .common import InfoExtractor -from ..utils import int_or_none, try_get +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, + try_get, + url_or_none, +) +from ..utils.traversal import traverse_obj class OlympicsReplayIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P<id>[^/#&?]+)' + _VALID_URL = r'https?://(?:www\.)?olympics\.com/[a-z]{2}/(?:paris-2024/)?(?:replay|videos?|original-series/episode)/(?P<id>[\w-]+)' _TESTS = [{ 'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays', 'info_dict': { @@ -11,26 +19,98 @@ class OlympicsReplayIE(InfoExtractor): 'ext': 'mp4', 'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020', 'upload_date': '20210801', - 'timestamp': 1627783200, + 'timestamp': 1627797600, 'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3', - 'uploader': 'International Olympic Committee', + 'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/nua4o7zwyaznoaejpbk2', + 'duration': 7017.0, }, 'params': { 'skip_download': True, }, }, { - 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp', - 'only_matching': True, + 'url': 'https://olympics.com/en/original-series/episode/b-boys-and-b-girls-take-the-spotlight-breaking-life-road-to-paris-2024', + 'info_dict': { + 'id': '32633650-c5ee-4280-8b94-fb6defb6a9b5', + 'ext': 'mp4', + 'title': 'B-girl Nicka - Breaking Life, Road to Paris 2024 | Episode 1', + 'upload_date': '20240517', + 'timestamp': 1715948200, + 'description': 'md5:f63d728a41270ec628f6ac33ce471bb1', + 'thumbnail': 'https://img.olympics.com/images/image/private/t_1-1_1280/primary/a3j96l7j6so3vyfijby1', + 'duration': 1321.0, + }, + }, { + 'url': 'https://olympics.com/en/paris-2024/videos/men-s-preliminaries-gbr-esp-ned-rsa-hockey-olympic-games-paris-2024', + 'info_dict': { + 'id': '3d96db23-8eee-4b7c-8ef5-488a0361026c', + 'ext': 'mp4', + 'title': 'Men\'s Preliminaries GBR-ESP & NED-RSA | Hockey | Olympic Games Paris 2024', + 'upload_date': '20240727', + 'timestamp': 1722066600, + }, + 'skip': 'Geo-restricted to RU, BR, BT, NP, TM, BD, TL', + }, { + 'url': 'https://olympics.com/en/paris-2024/videos/dnp-suni-lee-i-have-goals-and-i-have-expectations-for-myself-but-i-also-am-trying-to-give-myself-grace', + 'info_dict': { + 'id': 'a42f37ab-8a74-41d0-a7d9-af27b7b02a90', + 'ext': 'mp4', + 'title': 'md5:c7cfbc9918636a98e66400a812e4d407', + 'upload_date': '20240729', + 'timestamp': 1722288600, + }, }] + _GEO_BYPASS = False + + def _extract_from_nextjs_data(self, webpage, video_id): + data = traverse_obj(self._search_nextjs_data(webpage, video_id, default={}), ( + 'props', 'pageProps', 'page', 'items', + lambda _, v: v['name'] == 'videoPlaylist', 'data', 'currentVideo', {dict}, any)) + if not data: + return None + + geo_countries = traverse_obj(data, ('countries', ..., {str})) + if traverse_obj(data, ('geoRestrictedVideo', {bool})): + self.raise_geo_restricted(countries=geo_countries) + + is_live = traverse_obj(data, ('streamingStatus', {str})) == 'LIVE' + m3u8_url = traverse_obj(data, ('videoUrl', {url_or_none})) or data['streamUrl'] + tokenized_url = m3u8_url if is_live else self._tokenize_url(m3u8_url, video_id) + + try: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + tokenized_url, video_id, 'mp4', m3u8_id='hls') + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and 'georestricted' in e.cause.msg: + self.raise_geo_restricted(countries=geo_countries) + raise + + return { + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + **traverse_obj(data, { + 'id': ('videoID', {str}), + 'title': ('title', {str}), + 'timestamp': ('contentDate', {parse_iso8601}), + }), + } + + def _tokenize_url(self, url, video_id): + return self._download_json( + 'https://olympics.com/tokenGenerator', video_id, + 'Downloading tokenized m3u8 url', query={'url': url}) def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + if info := self._extract_from_nextjs_data(webpage, video_id): + return info + title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage) - uuid = self._html_search_meta('episode_uid', webpage) + video_uuid = self._html_search_meta('episode_uid', webpage) m3u8_url = self._html_search_meta('video_url', webpage) - json_ld = self._search_json_ld(webpage, uuid) + json_ld = self._search_json_ld(webpage, video_uuid) thumbnails_list = json_ld.get('image') if not thumbnails_list: thumbnails_list = self._html_search_regex( @@ -48,12 +128,12 @@ class OlympicsReplayIE(InfoExtractor): 'width': width, 'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)), }) - m3u8_url = self._download_json( - f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls') + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + self._tokenize_url(m3u8_url, video_uuid), video_uuid, 'mp4', m3u8_id='hls') return { - 'id': uuid, + 'id': video_uuid, 'title': title, 'thumbnails': thumbnails, 'formats': formats, diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py index e3e1055..d702640 100644 --- a/yt_dlp/extractor/tva.py +++ b/yt_dlp/extractor/tva.py @@ -1,60 +1,29 @@ import functools import re +from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none from ..utils.traversal import traverse_obj class TVAIE(InfoExtractor): - _VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)' + IE_NAME = 'tvaplus' + IE_DESC = 'TVA+' + _VALID_URL = r'https?://(?:www\.)?tvaplus\.ca/(?:[^/?#]+/)*[\w-]+-(?P<id>\d+)(?:$|[#?])' _TESTS = [{ - 'url': 'https://videos.tva.ca/details/_5596811470001', - 'info_dict': { - 'id': '5596811470001', - 'ext': 'mp4', - 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !', - 'uploader_id': '5481942443001', - 'upload_date': '20171003', - 'timestamp': 1507064617, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': 'HTTP Error 404: Not Found', - }, { - 'url': 'https://video.tva.ca/details/_5596811470001', - 'only_matching': True, - }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s' - - def _real_extract(self, url): - video_id = self._match_id(url) - - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}), - 'ie_key': 'BrightcoveNew', - } - - -class QubIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)' - _TESTS = [{ - 'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619', + 'url': 'https://www.tvaplus.ca/tva/alerte-amber/saison-1/episode-01-1000036619', 'md5': '949490fd0e7aee11d0543777611fbd53', 'info_dict': { 'id': '6084352463001', 'ext': 'mp4', - 'title': 'Ép 01. Mon dernier jour', + 'title': 'Mon dernier jour', 'uploader_id': '5481942443001', 'upload_date': '20190907', 'timestamp': 1567899756, 'description': 'md5:9c0d7fbb90939420c651fd977df90145', 'thumbnail': r're:https://.+\.jpg', - 'episode': 'Ép 01. Mon dernier jour', + 'episode': 'Mon dernier jour', 'episode_number': 1, 'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'], 'duration': 2625.963, @@ -64,23 +33,36 @@ class QubIE(InfoExtractor): 'channel': 'TVA', }, }, { - 'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943', - 'only_matching': True, + 'url': 'https://www.tvaplus.ca/tva/le-baiser-du-barbu/le-baiser-du-barbu-886644190', + 'info_dict': { + 'id': '6354448043112', + 'ext': 'mp4', + 'title': 'Le Baiser du barbu', + 'uploader_id': '5481942443001', + 'upload_date': '20240606', + 'timestamp': 1717694023, + 'description': 'md5:025b1219086c1cbf4bc27e4e034e8b57', + 'thumbnail': r're:https://.+\.jpg', + 'episode': 'Le Baiser du barbu', + 'tags': ['fullepisode', 'films'], + 'duration': 6053.504, + 'series': 'Le Baiser du barbu', + 'channel': 'TVA', + }, }] - # reference_id also works with old account_id(5481942443001) - # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s' + _BC_URL_TMPL = 'https://players.brightcove.net/5481942443001/default_default/index.html?videoId={}' def _real_extract(self, url): entity_id = self._match_id(url) webpage = self._download_webpage(url, entity_id) - entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData'] + entity = self._search_nextjs_data(webpage, entity_id)['props']['pageProps']['staticEntity'] video_id = entity['videoId'] episode = strip_or_none(entity.get('name')) return { '_type': 'url_transparent', - 'url': f'https://videos.tva.ca/details/_{video_id}', - 'ie_key': TVAIE.ie_key(), + 'url': smuggle_url(self._BC_URL_TMPL.format(video_id), {'geo_countries': ['CA']}), + 'ie_key': BrightcoveNewIE.ie_key(), 'id': video_id, 'title': episode, 'episode': episode, diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index 8105db4..c13832c 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -10,7 +10,7 @@ from ..utils import ( class TVerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'skip': 'videos are only available for 7 days', 'url': 'https://tver.jp/episodes/ep83nf3w4p', @@ -24,6 +24,20 @@ class TVerIE(InfoExtractor): }, 'add_ie': ['BrightcoveNew'], }, { + 'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/', + 'info_dict': { + 'id': '6359578055112', + 'ext': 'mp4', + 'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」', + 'timestamp': 1722279928, + 'upload_date': '20240729', + 'tags': ['20240729', 'japanese', 'japanmedal', 'paris'], + 'uploader_id': '4774017240001', + 'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg', + 'duration': 670.571, + }, + 'params': {'skip_download': 'm3u8'}, + }, { 'url': 'https://tver.jp/corner/f0103888', 'only_matching': True, }, { @@ -47,7 +61,15 @@ class TVerIE(InfoExtractor): def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') - if video_type not in {'series', 'episodes'}: + + if video_type == 'olympic/paris2024/video': + # Player ID is taken from .content.brightcove.E200.pro.pc.account_id: + # https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d= + return self.url_result(smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id), + {'geo_countries': ['JP']}), 'BrightcoveNew') + + elif video_type not in {'series', 'episodes'}: webpage = self._download_webpage(url, video_id, note='Resolving to new URL') video_id = self._match_id(self._search_regex( (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 1e2d118..8b7ec1d 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -49,6 +49,7 @@ class KnownDRMIE(UnsupportedInfoExtractor): r'amazon\.(?:\w{2}\.)?\w+/gp/video', r'music\.amazon\.(?:\w{2}\.)?\w+', r'(?:watch|front)\.njpwworld\.com', + r'qub\.ca/vrai', ) _TESTS = [{ @@ -149,6 +150,9 @@ class KnownDRMIE(UnsupportedInfoExtractor): }, { 'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs', 'only_matching': True, + }, { + 'url': 'https://www.qub.ca/vrai/l-effet-bocuse-d-or/saison-1/l-effet-bocuse-d-or-saison-1-bande-annonce-1098225063', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index d10689c..a20cf4b 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1267,7 +1267,7 @@ class VimeoGroupsIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE class VimeoReviewIE(VimeoBaseInfoExtractor): IE_NAME = 'vimeo:review' IE_DESC = 'Review pages on vimeo' - _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})' + _VALID_URL = r'https?://vimeo\.com/(?P<user>[^/?#]+)/review/(?P<id>\d+)/(?P<hash>[\da-f]{10})' _TESTS = [{ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 'md5': 'c507a72f780cacc12b2248bb4006d253', @@ -1313,26 +1313,22 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): }] def _real_extract(self, url): - page_url, video_id = self._match_valid_url(url).groups() - data = self._download_json( - page_url.replace('/review/', '/review/data/'), video_id) + user, video_id, review_hash = self._match_valid_url(url).group('user', 'id', 'hash') + data_url = f'https://vimeo.com/{user}/review/data/{video_id}/{review_hash}' + data = self._download_json(data_url, video_id) if data.get('isLocked') is True: video_password = self._get_video_password() viewer = self._download_json( 'https://vimeo.com/_rv/viewer', video_id) - webpage = self._verify_video_password(video_id, video_password, viewer['xsrft']) - clip_page_config = self._parse_json(self._search_regex( - r'window\.vimeo\.clip_page_config\s*=\s*({.+?});', - webpage, 'clip page config'), video_id) - config_url = clip_page_config['player']['config_url'] - clip_data = clip_page_config.get('clip') or {} - else: - clip_data = data['clipData'] - config_url = clip_data['configUrl'] + self._verify_video_password(video_id, video_password, viewer['xsrft']) + data = self._download_json(data_url, video_id) + clip_data = data['clipData'] + config_url = clip_data['configUrl'] config = self._download_json(config_url, video_id) info_dict = self._parse_config(config, video_id) source_format = self._extract_original_format( - page_url + '/action', video_id) + f'https://vimeo.com/{user}/review/{video_id}/{review_hash}/action', video_id, + unlisted_hash=traverse_obj(config_url, ({parse_qs}, 'h', -1))) if source_format: info_dict['formats'].append(source_format) info_dict['description'] = clean_html(clip_data.get('description')) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7364e8a..88e1a28 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -72,133 +72,169 @@ STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client' # any clients starting with _ cannot be explicitly requested by the user INNERTUBE_CLIENTS = { 'web': { - 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB', - 'clientVersion': '2.20220801.00.00', + 'clientVersion': '2.20240726.00.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, + }, + # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats + 'web_safari': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB', + 'clientVersion': '2.20240726.00.00', + 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, }, 'web_embedded': { - 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_EMBEDDED_PLAYER', - 'clientVersion': '1.20220731.00.00', + 'clientVersion': '1.20240723.01.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, }, 'web_music': { - 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30', 'INNERTUBE_HOST': 'music.youtube.com', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_REMIX', - 'clientVersion': '1.20220727.01.00', + 'clientVersion': '1.20240724.00.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, }, 'web_creator': { - 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB_CREATOR', - 'clientVersion': '1.20220726.00.00', + 'clientVersion': '1.20240723.03.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, }, 'android': { - 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID', - 'clientVersion': '19.09.37', + 'clientVersion': '19.29.37', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, 'REQUIRE_JS_PLAYER': False, }, - 'android_embedded': { - 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw', + 'android_music': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'ANDROID_EMBEDDED_PLAYER', - 'clientVersion': '19.09.37', + 'clientName': 'ANDROID_MUSIC', + 'clientVersion': '7.11.50', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 55, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, 'REQUIRE_JS_PLAYER': False, }, - 'android_music': { - 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI', + 'android_creator': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'ANDROID_MUSIC', - 'clientVersion': '6.42.52', + 'clientName': 'ANDROID_CREATOR', + 'clientVersion': '24.30.100', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, 'REQUIRE_JS_PLAYER': False, }, - 'android_creator': { - 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8', + # YouTube Kids videos aren't returned on this client for some reason + 'android_vr': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'ANDROID_CREATOR', - 'clientVersion': '22.30.100', + 'clientName': 'ANDROID_VR', + 'clientVersion': '1.57.29', + 'deviceMake': 'Oculus', + 'deviceModel': 'Quest 3', + 'androidSdkVersion': 32, + 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip', + 'osName': 'Android', + 'osVersion': '12L', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, + 'REQUIRE_JS_PLAYER': False, + }, + 'android_testsuite': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID_TESTSUITE', + 'clientVersion': '1.9', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 30, 'REQUIRE_JS_PLAYER': False, + 'PLAYER_PARAMS': '2AMB', }, - # iOS clients have HLS live streams. Setting device model to get 60fps formats. - # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 - 'ios': { - 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc', + # This client only has legacy formats and storyboards + 'android_producer': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'IOS', - 'clientVersion': '19.09.3', - 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + 'clientName': 'ANDROID_PRODUCER', + 'clientVersion': '0.111.1', + 'androidSdkVersion': 30, + 'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip', + 'osName': 'Android', + 'osVersion': '11', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 91, 'REQUIRE_JS_PLAYER': False, }, - 'ios_embedded': { + # iOS clients have HLS live streams. Setting device model to get 60fps formats. + # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 + 'ios': { 'INNERTUBE_CONTEXT': { 'client': { - 'clientName': 'IOS_MESSAGES_EXTENSION', - 'clientVersion': '19.09.3', - 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + 'clientName': 'IOS', + 'clientVersion': '19.29.1', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '17.5.1.21F90', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 66, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'REQUIRE_JS_PLAYER': False, }, 'ios_music': { - 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_MUSIC', - 'clientVersion': '6.33.3', - 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + 'clientVersion': '7.08.2', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '17.5.1.21F90', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, @@ -208,9 +244,12 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_CREATOR', - 'clientVersion': '22.33.101', - 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + 'clientVersion': '24.30.100', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'osName': 'iPhone', + 'osVersion': '17.5.1.21F90', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, @@ -219,19 +258,26 @@ INNERTUBE_CLIENTS = { # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 'mweb': { - 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'MWEB', - 'clientVersion': '2.20220801.00.00', + 'clientVersion': '2.20240726.01.00', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, }, + 'tv': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5', + 'clientVersion': '7.20240724.13.00', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, + }, # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) # See: https://github.com/zerodytrash/YouTube-Internal-Clients 'tv_embedded': { - 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', @@ -249,6 +295,7 @@ INNERTUBE_CLIENTS = { }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 95, + 'REQUIRE_JS_PLAYER': False, }, } @@ -262,7 +309,7 @@ def _split_innertube_client(client_name): def short_client_name(client_name): - main, *parts = _split_innertube_client(client_name)[0].replace('embedscreen', 'e_s').split('_') + main, *parts = _split_innertube_client(client_name)[0].split('_') return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper() @@ -274,23 +321,18 @@ def build_innertube_clients(): priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): - ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8') ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') ytcfg.setdefault('REQUIRE_JS_PLAYER', True) + ytcfg.setdefault('PLAYER_PARAMS', None) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') _, base_client, variant = _split_innertube_client(client) ytcfg['priority'] = 10 * priority(base_client) - if not variant: - INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg) - embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' - embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY - embedscreen['priority'] -= 3 - elif variant == 'embedded': + if variant == 'embedded': ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY ytcfg['priority'] -= 2 - else: + elif variant: ytcfg['priority'] -= 3 @@ -566,9 +608,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0] or req_api_hostname or self._get_innertube_host(default_client or 'web')) - def _extract_api_key(self, ytcfg=None, default_client='web'): - return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client) - def _extract_context(self, ytcfg=None, default_client='web'): context = get_first( (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict) @@ -614,13 +653,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor): real_headers.update({'content-type': 'application/json'}) if headers: real_headers.update(headers) - api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0] - or api_key or self._extract_api_key(default_client=default_client)) return self._download_json( f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}', video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, - query={'key': api_key, 'prettyPrint': 'false'}) + query=filter_dict({ + 'key': self._configuration_arg( + 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0], + 'prettyPrint': 'false', + }, cndn=lambda _, v: v)) def extract_yt_initial_data(self, item_id, webpage, fatal=True): return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal) @@ -972,7 +1013,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor): ep=ep, fatal=True, headers=headers, video_id=item_id, query=query, note=note, context=self._extract_context(ytcfg, default_client), - api_key=self._extract_api_key(ytcfg, default_client), api_hostname=api_hostname, default_client=default_client) except ExtractorError as e: if not isinstance(e.cause, network_exceptions): @@ -1295,6 +1335,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') _POTOKEN_EXPERIMENTS = ('51217476', '51217102') + _BROKEN_CLIENTS = { + short_client_name(client): client + for client in ('android', 'android_creator', 'android_music') + } _GEO_BYPASS = False @@ -3129,19 +3173,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.write_debug(f'Decrypted nsig {s} => {ret}') return ret - def _extract_n_function_name(self, jscode): + def _extract_n_function_name(self, jscode, player_url=None): + # Examples (with placeholders nfunc, narray, idx): + # * .get("n"))&&(b=nfunc(b) + # * .get("n"))&&(b=narray[idx](b) + # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c) + # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("") + # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("") funcname, idx = self._search_regex( r'''(?x) (?: \.get\("n"\)\)&&\(b=| (?: b=String\.fromCharCode\(110\)| - ([a-zA-Z0-9$.]+)&&\(b="nn"\[\+\1\] - ),c=a\.get\(b\)\)&&\(c= - ) - (?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''', - jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) - if not idx: + (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\] + ),c=a\.get\(b\)\)&&\(c=| + \b(?P<var>[a-zA-Z0-9_$]+)= + )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\) + (?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''', + jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None)) + if not funcname: + self.report_warning(join_nonempty( + 'Falling back to generic n function search', + player_url and f' player = {player_url}', delim='\n')) + return self._search_regex( + r'''(?xs) + ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\) + \s*\{(?:(?!};).)+?["']enhanced_except_''', + jscode, 'Initial JS player n function name', group='name') + elif not idx: return funcname return json.loads(js_to_json(self._search_regex( @@ -3157,7 +3217,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if func_code: return jsi, player_id, func_code - func_name = self._extract_n_function_name(jscode) + func_name = self._extract_n_function_name(jscode, player_url=player_url) func_code = jsi.extract_function_code(func_name) @@ -3661,9 +3721,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'videoId': video_id, } - pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0] - if pp_arg: - yt_query['params'] = pp_arg + default_pp = traverse_obj( + INNERTUBE_CLIENTS, (_split_innertube_client(client)[0], 'PLAYER_PARAMS', {str})) + if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]: + yt_query['params'] = player_params yt_query.update(self._generate_player_context(sts)) return self._extract_response( @@ -3675,8 +3736,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_requested_clients(self, url, smuggled_data): requested_clients = [] - android_clients = [] - default = ['ios', 'web'] + broken_clients = [] + default = ['ios', 'tv'] allowed_clients = sorted( (client for client in INNERTUBE_CLIENTS if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) @@ -3687,18 +3748,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): requested_clients.extend(allowed_clients) elif client not in allowed_clients: self.report_warning(f'Skipping unsupported client {client}') - elif client.startswith('android'): - android_clients.append(client) + elif client in self._BROKEN_CLIENTS.values(): + broken_clients.append(client) else: requested_clients.append(client) - # Force deprioritization of broken Android clients for format de-duplication - requested_clients.extend(android_clients) + # Force deprioritization of _BROKEN_CLIENTS for format de-duplication + requested_clients.extend(broken_clients) if not requested_clients: requested_clients = default if smuggled_data.get('is_music_url') or self.is_music_url(url): - requested_clients.extend( - f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS) + for requested_client in requested_clients: + _, base_client, variant = _split_innertube_client(requested_client) + music_client = f'{base_client}_music' + if variant != 'music' and music_client in INNERTUBE_CLIENTS: + requested_clients.append(music_client) return orderedSet(requested_clients) @@ -3792,14 +3856,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f[STREAMING_DATA_CLIENT_NAME] = name prs.append(pr) - # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in - if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: - append_client(f'{base_client}_creator') - elif self._is_agegated(pr): - if variant == 'tv_embedded': - append_client(f'{base_client}_embedded') - elif not variant: - append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') + # tv_embedded can work around age-gate and age-verification IF the video is embeddable + if self._is_agegated(pr) and variant != 'tv_embedded': + append_client(f'tv_embedded.{base_client}') + + # Unauthenticated users will only get tv_embedded client formats if age-gated + if self._is_agegated(pr) and not self.is_authenticated: + self.to_screen( + f'{video_id}: This video is age-restricted; some formats may be missing ' + f'without authentication. {self._login_hint()}', only_once=True) + + # EU countries require age-verification for accounts to access age-restricted videos + # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients + # If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded + embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr) + if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled): + self.to_screen( + f'{video_id}: This video is age-restricted and YouTube is requiring ' + 'account age-verification; some formats may be missing', only_once=True) + # web_creator and mediaconnect can work around the age-verification requirement + # _producer, _testsuite, & _vr variants can also work around age-verification + append_client('web_creator', 'mediaconnect') if skipped_clients: self.report_warning( @@ -3935,13 +4012,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) client_name = fmt.get(STREAMING_DATA_CLIENT_NAME) - # Android client formats are broken due to integrity check enforcement + # _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554 - is_broken = client_name and client_name.startswith(short_client_name('android')) + is_broken = client_name in self._BROKEN_CLIENTS if is_broken: self.report_warning( - f'{video_id}: Android client formats are broken and may yield HTTP Error 403. ' - 'They will be deprioritized', only_once=True) + f'{video_id}: {self._BROKEN_CLIENTS[client_name]} client formats are broken ' + 'and may yield HTTP Error 403. They will be deprioritized', only_once=True) name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or '' fps = int_or_none(fmt.get('fps')) or 0 diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 3e3b285..0d3e707 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1217,7 +1217,7 @@ def unified_timestamp(date_str, day_first=True): return None date_str = re.sub(r'\s+', ' ', re.sub( - r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str)) + r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?|sun)(day)?', '', date_str)) pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0 timezone, date_str = extract_timezone(date_str) diff --git a/yt_dlp/version.py b/yt_dlp/version.py index e641bf5..81d1c2c 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.07.25' +__version__ = '2024.08.01' -RELEASE_GIT_HEAD = 'f0993391e6052ec8f7aacc286609564f226943b9' +RELEASE_GIT_HEAD = 'ffd7781d6588926f820b44a34b9e6e3068fb9f97' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.07.25' +_pkg_version = '2024.08.01' |