diff options
Diffstat (limited to '')
-rw-r--r-- | CONTRIBUTORS | 4 | ||||
-rw-r--r-- | Changelog.md | 25 | ||||
-rw-r--r-- | devscripts/changelog_override.json | 5 | ||||
-rw-r--r-- | supportedsites.md | 3 | ||||
-rw-r--r-- | yt_dlp/extractor/_extractors.py | 5 | ||||
-rw-r--r-- | yt_dlp/extractor/abematv.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/cellebrite.py | 71 | ||||
-rw-r--r-- | yt_dlp/extractor/chzzk.py | 37 | ||||
-rw-r--r-- | yt_dlp/extractor/douyutv.py | 6 | ||||
-rw-r--r-- | yt_dlp/extractor/jiosaavn.py | 9 | ||||
-rw-r--r-- | yt_dlp/extractor/swearnet.py | 64 | ||||
-rw-r--r-- | yt_dlp/extractor/toggle.py | 28 | ||||
-rw-r--r-- | yt_dlp/extractor/vidyard.py | 426 | ||||
-rw-r--r-- | yt_dlp/extractor/vimeo.py | 87 | ||||
-rw-r--r-- | yt_dlp/extractor/vtv.py | 108 | ||||
-rw-r--r-- | yt_dlp/extractor/yle_areena.py | 15 | ||||
-rw-r--r-- | yt_dlp/utils/_utils.py | 42 | ||||
-rw-r--r-- | yt_dlp/version.py | 6 |
18 files changed, 767 insertions, 178 deletions
diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 7d0c5bd..60e25d0 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -646,3 +646,7 @@ tippfehlr varunchopra DrakoCpp PatrykMis +DinhHuy2010 +exterrestris +harbhim +LeSuisse diff --git a/Changelog.md b/Changelog.md index b1eb6e3..b5a829d 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,31 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.07.07 + +#### Important changes +- Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785) + - A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors + +#### Core changes +- [Address gaps in allowed extensions](https://github.com/yt-dlp/yt-dlp/commit/2469119490d7e0397ebbf5c5ae327316f955eef2) ([#10362](https://github.com/yt-dlp/yt-dlp/issues/10362)) by [bashonly](https://github.com/bashonly) +- [Fix `--ignore-no-formats-error`](https://github.com/yt-dlp/yt-dlp/commit/cc767e9490056efaaa11c186b0d032e4b4969180) ([#10345](https://github.com/yt-dlp/yt-dlp/issues/10345)) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- **abematv**: [Extract availability](https://github.com/yt-dlp/yt-dlp/commit/2a1a1b8e67e864289ac7ba5d05ec63dbb19a639f) ([#10348](https://github.com/yt-dlp/yt-dlp/issues/10348)) by [middlingphys](https://github.com/middlingphys) +- **chzzk**: [Extract with API v3](https://github.com/yt-dlp/yt-dlp/commit/4862a29854d4044120e3f97b52199711ad04bee1) ([#10363](https://github.com/yt-dlp/yt-dlp/issues/10363)) by [hui1601](https://github.com/hui1601) +- **douyutv**: [Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/commit/6075a029dba70a89675ae1250e7cdfd91f0eba41) ([#10347](https://github.com/yt-dlp/yt-dlp/issues/10347)) by [LeSuisse](https://github.com/LeSuisse) +- **jiosaavn**: playlist: [Support featured playlists](https://github.com/yt-dlp/yt-dlp/commit/f0f867f008a1728f5f6ac1224b9e014b5d27f817) ([#10382](https://github.com/yt-dlp/yt-dlp/issues/10382)) by [harbhim](https://github.com/harbhim) +- **vidyard**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/00766ece0c5c7a80781a4ff677198c5fb69d9dc0) ([#10155](https://github.com/yt-dlp/yt-dlp/issues/10155)) by [exterrestris](https://github.com/exterrestris) +- **vimeo**: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/c1c9bb4adb42d0d93a2fb5d93a7de0a87b6ba884) ([#10341](https://github.com/yt-dlp/yt-dlp/issues/10341)) by [bashonly](https://github.com/bashonly) +- **vtv**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/987a1f94c24275f2b0cd82e719956687415dd732) ([#10173](https://github.com/yt-dlp/yt-dlp/issues/10173)) by [DinhHuy2010](https://github.com/DinhHuy2010) +- **yle_areena** + - [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/4cdc976bd861b5835601ae402bef543eacd88f3d) ([#10380](https://github.com/yt-dlp/yt-dlp/issues/10380)) by [seproDev](https://github.com/seproDev) + - [Fix subtitle extraction](https://github.com/yt-dlp/yt-dlp/commit/0d174e8bed32081eb38ef7f5d1a1282ae154f517) ([#10379](https://github.com/yt-dlp/yt-dlp/issues/10379)) by [Grub4K](https://github.com/Grub4K) + +#### Misc. changes +- **cleanup**: Miscellaneous: [b337d29](https://github.com/yt-dlp/yt-dlp/commit/b337d2989ce0614651d363383f6f743d977248ef) by [bashonly](https://github.com/bashonly) + ### 2024.07.02 #### Core changes diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index ab42f55..5189de2 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -180,5 +180,10 @@ "action": "add", "when": "6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733", "short": "[priority] Security: [[CVE-2024-38519](https://nvd.nist.gov/vuln/detail/CVE-2024-38519)] [Properly sanitize file-extension to prevent file system modification and RCE](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j)\n - Unsafe extensions are now blocked from being downloaded" + }, + { + "action": "add", + "when": "6075a029dba70a89675ae1250e7cdfd91f0eba41", + "short": "[priority] Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785)\n - A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors" } ] diff --git a/supportedsites.md b/supportedsites.md index 15fc496..42543af 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -1618,6 +1618,7 @@ - **VidLii** - **Vidly** - **vids.io** + - **Vidyard** - **viewlift** - **viewlift:embed** - **Viidea** @@ -1665,6 +1666,8 @@ - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza - **VrtNU**: [*vrtnu*](## "netrc machine") VRT MAX - **VTM**: (**Currently broken**) + - **VTV** + - **VTVGo** - **VTXTV**: [*vtxtv*](## "netrc machine") - **VTXTVLive**: [*vtxtv*](## "netrc machine") - **VTXTVRecordings**: [*vtxtv*](## "netrc machine") diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 7f6507d..fc917ff 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2324,6 +2324,7 @@ from .vidio import ( ) from .vidlii import VidLiiIE from .vidly import VidlyIE +from .vidyard import VidyardIE from .viewlift import ( ViewLiftEmbedIE, ViewLiftIE, @@ -2389,6 +2390,10 @@ from .vrt import ( VrtNUIE, ) from .vtm import VTMIE +from .vtv import ( + VTVIE, + VTVGoIE, +) from .vuclip import VuClipIE from .vvvvid import ( VVVVIDIE, diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 293a6c4..9471df1 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -368,6 +368,7 @@ class AbemaTVIE(AbemaTVBaseIE): info['episode_number'] = epis if epis < 2000 else None is_live, m3u8_url = False, None + availability = 'public' if video_type == 'now-on-air': is_live = True channel_url = 'https://api.abema.io/v1/channels' @@ -389,6 +390,7 @@ class AbemaTVIE(AbemaTVBaseIE): if 3 not in ondemand_types: # cannot acquire decryption key for these streams self.report_warning('This is a premium-only stream') + availability = 'premium_only' info.update(traverse_obj(api_response, { 'series': ('series', 'title'), 'season': ('season', 'name'), @@ -408,6 +410,7 @@ class AbemaTVIE(AbemaTVBaseIE): headers=headers) if not traverse_obj(api_response, ('slot', 'flags', 'timeshiftFree'), default=False): self.report_warning('This is a premium-only stream') + availability = 'premium_only' m3u8_url = f'https://vod-abematv.akamaized.net/slot/{video_id}/playlist.m3u8' else: @@ -425,6 +428,7 @@ class AbemaTVIE(AbemaTVBaseIE): 'description': description, 'formats': formats, 'is_live': is_live, + 'availability': availability, }) return info diff --git a/yt_dlp/extractor/cellebrite.py b/yt_dlp/extractor/cellebrite.py index e90365a..54367c4 100644 --- a/yt_dlp/extractor/cellebrite.py +++ b/yt_dlp/extractor/cellebrite.py @@ -1,63 +1,50 @@ -from .common import InfoExtractor -from ..utils import traverse_obj +from .vidyard import VidyardBaseIE, VidyardIE +from ..utils import ExtractorError, make_archive_id, url_basename -class CellebriteIE(InfoExtractor): +class CellebriteIE(VidyardBaseIE): _VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)' _TESTS = [{ 'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/', 'info_dict': { - 'id': '16025876', + 'id': 'ZqmUss3dQfEMGpauambPuH', + 'display_id': '16025876', 'ext': 'mp4', - 'description': 'md5:174571cb97083fd1d457d75c684f4e2b', - 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png', 'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED', - 'duration': 455, - 'tags': [], + 'description': 'md5:dee48fe12bbae5c01fe6a053f7676da4', + 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png', + 'duration': 455.979, + '_old_archive_ids': ['cellebrite 16025876'], }, }, { 'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/', 'info_dict': { - 'id': '29018255', + 'id': 'QV1U8a2yzcxigw7VFnqKyg', + 'display_id': '29018255', 'ext': 'mp4', - 'duration': 134, - 'tags': [], - 'description': 'md5:e9a3d124c7287b0b07bad2547061cacf', + 'title': 'How to Lawfully Collect the Maximum Amount of Data From Android Devices', + 'description': 'md5:0e943a9ac14c374d5d74faed634d773c', 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png', - 'title': 'Android Extractions Explained', + 'duration': 134.315, + '_old_archive_ids': ['cellebrite 29018255'], }, }] - def _get_formats_and_subtitles(self, json_data, display_id): - formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []] - subtitles = {} - - for url in traverse_obj(json_data, ('hls', ..., 'url')) or []: - fmt, sub = self._extract_m3u8_formats_and_subtitles( - url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'}) - formats.extend(fmt) - self._merge_subtitles(sub, target=subtitles) - - return formats, subtitles - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - player_uuid = self._search_regex( - r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID') - json_data = self._download_json( - f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0] + slug = self._match_id(url) + webpage = self._download_webpage(url, slug) + vidyard_url = next(VidyardIE._extract_embed_urls(url, webpage), None) + if not vidyard_url: + raise ExtractorError('No Vidyard video embeds found on page') + + video_id = url_basename(vidyard_url) + info = self._process_video_json(self._fetch_video_json(video_id)['chapters'][0], video_id) + if info.get('display_id'): + info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])] + if thumbnail := self._og_search_thumbnail(webpage, default=None): + info.setdefault('thumbnails', []).append({'url': thumbnail}) - formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id) return { - 'id': str(json_data['videoId']), - 'title': json_data.get('name') or self._og_search_title(webpage), - 'formats': formats, - 'subtitles': subtitles, - 'description': json_data.get('description') or self._og_search_description(webpage), - 'duration': json_data.get('seconds'), - 'tags': json_data.get('tags'), - 'thumbnail': self._og_search_thumbnail(webpage), - 'http_headers': {'Referer': 'https://play.vidyard.com/'}, + 'description': self._og_search_description(webpage, default=None), + **info, } diff --git a/yt_dlp/extractor/chzzk.py b/yt_dlp/extractor/chzzk.py index 420fe05..e0b9980 100644 --- a/yt_dlp/extractor/chzzk.py +++ b/yt_dlp/extractor/chzzk.py @@ -36,7 +36,7 @@ class CHZZKLiveIE(InfoExtractor): def _real_extract(self, url): channel_id = self._match_id(url) live_detail = self._download_json( - f'https://api.chzzk.naver.com/service/v2/channels/{channel_id}/live-detail', channel_id, + f'https://api.chzzk.naver.com/service/v3/channels/{channel_id}/live-detail', channel_id, note='Downloading channel info', errnote='Unable to download channel info')['content'] if live_detail.get('status') == 'CLOSE': @@ -106,12 +106,45 @@ class CHZZKVideoIE(InfoExtractor): 'upload_date': '20231219', 'view_count': int, }, + 'skip': 'Replay video is expired', + }, { + # Manually uploaded video + 'url': 'https://chzzk.naver.com/video/1980', + 'info_dict': { + 'id': '1980', + 'ext': 'mp4', + 'title': '※시청주의※한번보면 잊기 힘든 영상', + 'channel': '라디유radiyu', + 'channel_id': '68f895c59a1043bc5019b5e08c83a5c5', + 'channel_is_verified': False, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 95, + 'timestamp': 1703102631.722, + 'upload_date': '20231220', + 'view_count': int, + }, + }, { + # Partner channel replay video + 'url': 'https://chzzk.naver.com/video/2458', + 'info_dict': { + 'id': '2458', + 'ext': 'mp4', + 'title': '첫 방송', + 'channel': '강지', + 'channel_id': 'b5ed5db484d04faf4d150aedd362f34b', + 'channel_is_verified': True, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 4433, + 'timestamp': 1703307460.214, + 'upload_date': '20231223', + 'view_count': int, + }, }] def _real_extract(self, url): video_id = self._match_id(url) video_meta = self._download_json( - f'https://api.chzzk.naver.com/service/v2/videos/{video_id}', video_id, + f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id, note='Downloading video info', errnote='Unable to download video info')['content'] formats, subtitles = self._extract_mpd_formats_and_subtitles( f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id, diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index fdf19c2..e36eac9 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -24,8 +24,9 @@ from ..utils import ( class DouyuBaseIE(InfoExtractor): def _download_cryptojs_md5(self, video_id): for url in [ + # XXX: Do NOT use cdn.bootcdn.net; ref: https://sansec.io/research/polyfill-supply-chain-attack 'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js', - 'https://cdn.bootcdn.net/ajax/libs/crypto-js/3.1.2/rollups/md5.js', + 'https://unpkg.com/cryptojslib@3.1.2/rollups/md5.js', ]: js_code = self._download_webpage( url, video_id, note='Downloading signing dependency', fatal=False) @@ -35,7 +36,8 @@ class DouyuBaseIE(InfoExtractor): raise ExtractorError('Unable to download JS dependency (crypto-js/md5)') def _get_cryptojs_md5(self, video_id): - return self.cache.load('douyu', 'crypto-js-md5') or self._download_cryptojs_md5(video_id) + return self.cache.load( + 'douyu', 'crypto-js-md5', min_ver='2024.07.04') or self._download_cryptojs_md5(video_id) def _calc_sign(self, sign_func, video_id, a): b = uuid.uuid4().hex diff --git a/yt_dlp/extractor/jiosaavn.py b/yt_dlp/extractor/jiosaavn.py index 542e41b..030fe68 100644 --- a/yt_dlp/extractor/jiosaavn.py +++ b/yt_dlp/extractor/jiosaavn.py @@ -158,7 +158,7 @@ class JioSaavnAlbumIE(JioSaavnBaseIE): class JioSaavnPlaylistIE(JioSaavnBaseIE): IE_NAME = 'jiosaavn:playlist' - _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/(?:s/playlist/(?:[^/?#]+/){2}|featured/[^/?#]+/)(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__', 'info_dict': { @@ -173,6 +173,13 @@ class JioSaavnPlaylistIE(JioSaavnBaseIE): 'title': 'Mood Hindi', }, 'playlist_mincount': 801, + }, { + 'url': 'https://www.jiosaavn.com/featured/taaza-tunes/Me5RridRfDk_', + 'info_dict': { + 'id': 'Me5RridRfDk_', + 'title': 'Taaza Tunes', + }, + 'playlist_mincount': 301, }] _PAGE_SIZE = 50 diff --git a/yt_dlp/extractor/swearnet.py b/yt_dlp/extractor/swearnet.py index b4835c5..2d6fb3e 100644 --- a/yt_dlp/extractor/swearnet.py +++ b/yt_dlp/extractor/swearnet.py @@ -1,55 +1,31 @@ -from .common import InfoExtractor -from ..utils import ExtractorError, int_or_none, traverse_obj +from .vidyard import VidyardBaseIE +from ..utils import ExtractorError, int_or_none, make_archive_id -class SwearnetEpisodeIE(InfoExtractor): +class SwearnetEpisodeIE(VidyardBaseIE): _VALID_URL = r'https?://www\.swearnet\.com/shows/(?P<id>[\w-]+)/seasons/(?P<season_num>\d+)/episodes/(?P<episode_num>\d+)' _TESTS = [{ 'url': 'https://www.swearnet.com/shows/gettin-learnt-with-ricky/seasons/1/episodes/1', 'info_dict': { - 'id': '232819', + 'id': 'wicK2EOzjOdxkUXGDIgcPw', + 'display_id': '232819', 'ext': 'mp4', 'episode_number': 1, 'episode': 'Episode 1', 'duration': 719, - 'description': 'md5:c48ef71440ce466284c07085cd7bd761', + 'description': r're:Are you drunk and high and craving a grilled cheese sandwich.+', 'season': 'Season 1', 'title': 'Episode 1 - Grilled Cheese Sammich', 'season_number': 1, - 'thumbnail': 'https://cdn.vidyard.com/thumbnails/232819/_RX04IKIq60a2V6rIRqq_Q_small.jpg', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/custom/0dd74f9b-388a-452e-b570-b407fb64435b_small.jpg', + 'tags': ['Getting Learnt with Ricky', 'drunk', 'grilled cheese', 'high'], + '_old_archive_ids': ['swearnetepisode 232819'], }, }] - def _get_formats_and_subtitle(self, video_source, video_id): - video_source = video_source or {} - formats, subtitles = [], {} - for key, value in video_source.items(): - if key == 'hls': - for video_hls in value: - fmts, subs = self._extract_m3u8_formats_and_subtitles(video_hls.get('url'), video_id) - formats.extend(fmts) - self._merge_subtitles(subs, target=subtitles) - else: - formats.extend({ - 'url': video_mp4.get('url'), - 'ext': 'mp4', - } for video_mp4 in value) - - return formats, subtitles - - def _get_direct_subtitle(self, caption_json): - subs = {} - for caption in caption_json: - subs.setdefault(caption.get('language') or 'und', []).append({ - 'url': caption.get('vttUrl'), - 'name': caption.get('name'), - }) - - return subs - def _real_extract(self, url): - display_id, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num') - webpage = self._download_webpage(url, display_id) + slug, season_number, episode_number = self._match_valid_url(url).group('id', 'season_num', 'episode_num') + webpage = self._download_webpage(url, slug) try: external_id = self._search_regex(r'externalid\s*=\s*"([^"]+)', webpage, 'externalid') @@ -58,22 +34,12 @@ class SwearnetEpisodeIE(InfoExtractor): self.raise_login_required() raise - json_data = self._download_json( - f'https://play.vidyard.com/player/{external_id}.json', display_id)['payload']['chapters'][0] - - formats, subtitles = self._get_formats_and_subtitle(json_data['sources'], display_id) - self._merge_subtitles(self._get_direct_subtitle(json_data.get('captions')), target=subtitles) + info = self._process_video_json(self._fetch_video_json(external_id)['chapters'][0], external_id) + if info.get('display_id'): + info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])] return { - 'id': str(json_data['videoId']), - 'title': json_data.get('name') or self._html_search_meta(['og:title', 'twitter:title'], webpage), - 'description': (json_data.get('description') - or self._html_search_meta(['og:description', 'twitter:description'], webpage)), - 'duration': int_or_none(json_data.get('seconds')), - 'formats': formats, - 'subtitles': subtitles, + **info, 'season_number': int_or_none(season_number), 'episode_number': int_or_none(episode_number), - 'thumbnails': [{'url': thumbnail_url} - for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))], } diff --git a/yt_dlp/extractor/toggle.py b/yt_dlp/extractor/toggle.py index de2e03f..fbef7cc 100644 --- a/yt_dlp/extractor/toggle.py +++ b/yt_dlp/extractor/toggle.py @@ -28,35 +28,11 @@ class ToggleIE(InfoExtractor): 'skip_download': 'm3u8 download', }, }, { - 'note': 'DRM-protected video', 'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413', - 'info_dict': { - 'id': '341413', - 'ext': 'wvm', - 'title': 'Dug\'s Special Mission', - 'description': 'md5:e86c6f4458214905c1772398fabc93e0', - 'upload_date': '20150827', - 'timestamp': 1440644006, - }, - 'params': { - 'skip_download': 'DRM-protected wvm download', - }, + 'only_matching': True, }, { - # this also tests correct video id extraction - 'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay', 'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861', - 'info_dict': { - 'id': '332861', - 'ext': 'mp4', - 'title': '28th SEA Games (5 Show) - Episode 11', - 'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa', - 'upload_date': '20150605', - 'timestamp': 1433480166, - }, - 'params': { - 'skip_download': 'DRM-protected wvm download', - }, - 'skip': 'm3u8 links are geo-restricted', + 'only_matching': True, }, { 'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331', 'only_matching': True, diff --git a/yt_dlp/extractor/vidyard.py b/yt_dlp/extractor/vidyard.py new file mode 100644 index 0000000..20a54b1 --- /dev/null +++ b/yt_dlp/extractor/vidyard.py @@ -0,0 +1,426 @@ +import functools +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + float_or_none, + int_or_none, + join_nonempty, + mimetype2ext, + parse_resolution, + str_or_none, + unescapeHTML, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class VidyardBaseIE(InfoExtractor): + _HEADERS = {'Referer': 'https://play.vidyard.com/'} + + def _get_formats_and_subtitles(self, sources, video_id): + formats, subtitles = [], {} + + def add_hls_fmts_and_subs(m3u8_url): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + m3u8_url, video_id, 'mp4', m3u8_id='hls', headers=self._HEADERS, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + hls_list = isinstance(sources, dict) and sources.pop('hls', None) + if master_m3u8_url := traverse_obj( + hls_list, (lambda _, v: v['profile'] == 'auto', 'url', {url_or_none}, any)): + add_hls_fmts_and_subs(master_m3u8_url) + if not formats: # These are duplicate and unnecesary requests if we got 'auto' hls fmts + for variant_m3u8_url in traverse_obj(hls_list, (..., 'url', {url_or_none})): + add_hls_fmts_and_subs(variant_m3u8_url) + + for source_type, source_list in traverse_obj(sources, ({dict.items}, ...)): + for source in traverse_obj(source_list, lambda _, v: url_or_none(v['url'])): + profile = source.get('profile') + formats.append({ + 'url': source['url'], + 'ext': mimetype2ext(source.get('mimeType'), default=None), + 'format_id': join_nonempty('http', source_type, profile), + **parse_resolution(profile), + }) + + self._remove_duplicate_formats(formats) + return formats, subtitles + + def _get_direct_subtitles(self, caption_json): + subs = {} + for caption in traverse_obj(caption_json, lambda _, v: url_or_none(v['vttUrl'])): + subs.setdefault(caption.get('language') or 'und', []).append({ + 'url': caption['vttUrl'], + 'name': caption.get('name'), + }) + + return subs + + def _fetch_video_json(self, video_id): + return self._download_json( + f'https://play.vidyard.com/player/{video_id}.json', video_id)['payload'] + + def _process_video_json(self, json_data, video_id): + formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], video_id) + self._merge_subtitles(self._get_direct_subtitles(json_data.get('captions')), target=subtitles) + + return { + **traverse_obj(json_data, { + 'id': ('facadeUuid', {str}), + 'display_id': ('videoId', {int}, {str_or_none}), + 'title': ('name', {str}), + 'description': ('description', {str}, {unescapeHTML}, {lambda x: x or None}), + 'duration': (( + ('milliseconds', {functools.partial(float_or_none, scale=1000)}), + ('seconds', {int_or_none})), any), + 'thumbnails': ('thumbnailUrls', ('small', 'normal'), {'url': {url_or_none}}), + 'tags': ('tags', ..., 'name', {str}), + }), + 'formats': formats, + 'subtitles': subtitles, + 'http_headers': self._HEADERS, + } + + +class VidyardIE(VidyardBaseIE): + _VALID_URL = [ + r'https?://[\w-]+(?:\.hubs)?\.vidyard\.com/watch/(?P<id>[\w-]+)', + r'https?://(?:embed|share)\.vidyard\.com/share/(?P<id>[\w-]+)', + r'https?://play\.vidyard\.com/(?:player/)?(?P<id>[\w-]+)', + ] + _EMBED_REGEX = [r'<iframe[^>]* src=["\'](?P<url>(?:https?:)?//play\.vidyard\.com/[\w-]+)'] + _TESTS = [{ + 'url': 'https://vyexample03.hubs.vidyard.com/watch/oTDMPlUv--51Th455G5u7Q', + 'info_dict': { + 'id': 'oTDMPlUv--51Th455G5u7Q', + 'display_id': '50347', + 'ext': 'mp4', + 'title': 'Homepage Video', + 'description': 'Look I changed the description.', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg', + 'duration': 99, + 'tags': ['these', 'are', 'all', 'tags'], + }, + }, { + 'url': 'https://share.vidyard.com/watch/PaQzDAT1h8JqB8ivEu2j6Y?', + 'info_dict': { + 'id': 'PaQzDAT1h8JqB8ivEu2j6Y', + 'display_id': '9281024', + 'ext': 'mp4', + 'title': 'Inline Embed', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif', + 'duration': 41.186, + }, + }, { + 'url': 'https://embed.vidyard.com/share/oTDMPlUv--51Th455G5u7Q', + 'info_dict': { + 'id': 'oTDMPlUv--51Th455G5u7Q', + 'display_id': '50347', + 'ext': 'mp4', + 'title': 'Homepage Video', + 'description': 'Look I changed the description.', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg', + 'duration': 99, + 'tags': ['these', 'are', 'all', 'tags'], + }, + }, { + # First video from playlist below + 'url': 'https://embed.vidyard.com/share/SyStyHtYujcBHe5PkZc5DL', + 'info_dict': { + 'id': 'SyStyHtYujcBHe5PkZc5DL', + 'display_id': '41974005', + 'ext': 'mp4', + 'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track', + 'description': r're:In this video, you will learn how to prepare the frame.+', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png', + 'duration': 258.666, + }, + }, { + # Playlist + 'url': 'https://thelink.hubs.vidyard.com/watch/pwu7pCYWSwAnPxs8nDoFrE', + 'info_dict': { + 'id': 'pwu7pCYWSwAnPxs8nDoFrE', + 'title': 'PLAYLIST - Palm Beach Shutters- Bi-Fold Track System Installation', + 'entries': [{ + 'id': 'SyStyHtYujcBHe5PkZc5DL', + 'display_id': '41974005', + 'ext': 'mp4', + 'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png', + 'duration': 258.666, + }, { + 'id': '1Fw4B84jZTXLXWqkE71RiM', + 'display_id': '5861113', + 'ext': 'mp4', + 'title': 'Palm Beach - Bi-Fold Track System "Frame Installation"', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861113/29CJ54s5g1_aP38zkKLHew_small.jpg', + 'duration': 167.858, + }, { + 'id': 'DqP3wBvLXSpxrcqpT5kEeo', + 'display_id': '41976334', + 'ext': 'mp4', + 'title': 'Install the Track for Palm Beach Polysatin Shutters With BiFold Track', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861090/RwG2VaTylUa6KhSTED1r1Q_small.png', + 'duration': 94.229, + }, { + 'id': 'opfybfxpzQArxqtQYB6oBU', + 'display_id': '41976364', + 'ext': 'mp4', + 'title': 'Install the Panel for Palm Beach Polysatin Shutters With BiFold Track', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860926/JIOaJR08dM4QgXi_iQ2zGA_small.png', + 'duration': 191.467, + }, { + 'id': 'rWrXvkbTNNaNqD6189HJya', + 'display_id': '41976382', + 'ext': 'mp4', + 'title': 'Adjust the Panels for Palm Beach Polysatin Shutters With BiFold Track', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860687/CwHxBv4UudAhOh43FVB4tw_small.png', + 'duration': 138.155, + }, { + 'id': 'eYPTB521MZ9TPEArSethQ5', + 'display_id': '41976409', + 'ext': 'mp4', + 'title': 'Assemble and Install the Valance for Palm Beach Polysatin Shutters With BiFold Track', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861425/0y68qlMU4O5VKU7bJ8i_AA_small.png', + 'duration': 148.224, + }], + }, + 'playlist_count': 6, + }, { + # Non hubs.vidyard.com playlist + 'url': 'https://salesforce.vidyard.com/watch/d4vqPjs7Q5EzVEis5QT3jd', + 'info_dict': { + 'id': 'd4vqPjs7Q5EzVEis5QT3jd', + 'title': 'How To: Service Cloud: Import External Content in Lightning Knowledge', + 'entries': [{ + 'id': 'mcjDpSZir2iSttbvFkx6Rv', + 'display_id': '29479036', + 'ext': 'mp4', + 'title': 'Welcome to this Expert Coaching Series', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/ouyQi9WuwyiOupChUWNmjQ/7170d3485ba602e012df05_small.jpg', + 'duration': 38.205, + }, { + 'id': '84bPYwpg243G6xYEfJdYw9', + 'display_id': '21820704', + 'ext': 'mp4', + 'title': 'Chapter 1 - Title + Agenda', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/HFPN0ZgQq4Ow8BghGcQSow/bfaa30123c8f6601e7d7f2_small.jpg', + 'duration': 98.016, + }, { + 'id': 'nP17fMuvA66buVHUrzqjTi', + 'display_id': '21820707', + 'ext': 'mp4', + 'title': 'Chapter 2 - Import Options', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/rGRIF5nFjPI9OOA2qJ_Dbg/86a8d02bfec9a566845dd4_small.jpg', + 'duration': 199.136, + }, { + 'id': 'm54EcwXdpA5gDBH5rgCYoV', + 'display_id': '21820710', + 'ext': 'mp4', + 'title': 'Chapter 3 - Importing Article Translations', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/IVX4XR8zpSsiNIHx45kz-A/1ccbf8a29a33856d06b3ed_small.jpg', + 'duration': 184.352, + }, { + 'id': 'j4nzS42oq4hE9oRV73w3eQ', + 'display_id': '21820716', + 'ext': 'mp4', + 'title': 'Chapter 4 - Best Practices', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/BtrRrQpRDLbA4AT95YQyog/1f1e6b8e7fdc3fa95ec8d3_small.jpg', + 'duration': 296.960, + }, { + 'id': 'y28PYfW5pftvers9PXzisC', + 'display_id': '21820727', + 'ext': 'mp4', + 'title': 'Chapter 5 - Migration Steps', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/K2CdQOXDfLcrVTF60r0bdw/a09239ada28b6ffce12b1f_small.jpg', + 'duration': 620.640, + }, { + 'id': 'YWU1eQxYvhj29SjYoPw5jH', + 'display_id': '21820733', + 'ext': 'mp4', + 'title': 'Chapter 6 - Demo', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/rsmhP-cO8dAa8ilvFGCX0g/7911ef415167cd14032068_small.jpg', + 'duration': 631.456, + }, { + 'id': 'nmEvVqpwdJUgb74zKsLGxn', + 'display_id': '29479037', + 'ext': 'mp4', + 'title': 'Schedule Your Follow-Up', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/Rtwc7X4PEkF4Ae5kHi-Jvw/174ebed3f34227b1ffa1d0_small.jpg', + 'duration': 33.608, + }], + }, + 'playlist_count': 8, + }, { + # URL of iframe embed src + 'url': 'https://play.vidyard.com/iDqTwWGrd36vaLuaCY3nTs.html', + 'info_dict': { + 'id': 'iDqTwWGrd36vaLuaCY3nTs', + 'display_id': '9281009', + 'ext': 'mp4', + 'title': 'Lightbox Embed', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif', + 'duration': 39.035, + }, + }, { + # Player JSON URL + 'url': 'https://play.vidyard.com/player/7GAApnNNbcZZ46k6JqJQSh.json?disable_analytics=0', + 'info_dict': { + 'id': '7GAApnNNbcZZ46k6JqJQSh', + 'display_id': '820026', + 'ext': 'mp4', + 'title': 'The Art of Storytelling: How to Deliver Your Brand Story with Content & Social', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/MhbE-5sEFQu4x3fI6FkNlA/41eb5717c557cd19456910_small.jpg', + 'duration': 2153.013, + 'tags': ['Summit2017'], + }, + }, { + 'url': 'http://share.vidyard.com/share/diYeo6YR2yiGgL8odvS8Ri', + 'only_matching': True, + }, { + 'url': 'https://play.vidyard.com/FFlz3ZpxhIfKQ1fd9DAryA', + 'only_matching': True, + }, { + 'url': 'https://play.vidyard.com/qhMAu5A76GZVrFzOPgSf9A/type/standalone', + 'only_matching': True, + }] + _WEBPAGE_TESTS = [{ + # URL containing inline/lightbox embedded video + 'url': 'https://resources.altium.com/p/2-the-extreme-importance-of-pc-board-stack-up', + 'info_dict': { + 'id': 'GDx1oXrFWj4XHbipfoXaMn', + 'display_id': '3225198', + 'ext': 'mp4', + 'title': 'The Extreme Importance of PC Board Stack Up', + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/73_Q3_hBexWX7Og1sae6cg/9998fa4faec921439e2c04_small.jpg', + 'duration': 3422.742, + }, + }, { + # <script ... id="vidyard_embed_code_DXx2sW4WaLA6hTdGFz7ja8" src="//play.vidyard.com/DXx2sW4WaLA6hTdGFz7ja8.js? + 'url': 'http://videos.vivint.com/watch/DXx2sW4WaLA6hTdGFz7ja8', + 'info_dict': { + 'id': 'DXx2sW4WaLA6hTdGFz7ja8', + 'display_id': '2746529', + 'ext': 'mp4', + 'title': 'How To Powercycle the Smart Hub Panel', + 'duration': 30.613, + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/_-6cw8xQUJ3qiCs_JENc_A/b21d7a5e47967f49399d30_small.jpg', + }, + }, { + # <script id="vidyard_embed_code_MIBHhiLVTxga7wqLsuoDjQ" src="//embed.vidyard.com/embed/MIBHhiLVTxga7wqLsuoDjQ/inline?v=2.1"> + 'url': 'https://www.babypips.com/learn/forex/introduction-to-metatrader4', + 'info_dict': { + 'id': 'MIBHhiLVTxga7wqLsuoDjQ', + 'display_id': '20291', + 'ext': 'mp4', + 'title': 'Lesson 1 - Opening an MT4 Account', + 'description': 'Never heard of MetaTrader4? Here\'s the 411 on the popular trading platform!', + 'duration': 168, + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/20291/IM-G2WXQR9VBLl2Cmzvftg_small.jpg', + }, + }, { + # <iframe ... src="//play.vidyard.com/d61w8EQoZv1LDuPxDkQP2Q/type/background?preview=1" + 'url': 'https://www.avaya.com/en/', + 'info_dict': { + # These values come from the generic extractor and don't matter + 'id': str, + 'title': str, + 'age_limit': 0, + 'upload_date': str, + 'description': str, + 'thumbnail': str, + 'timestamp': float, + }, + 'playlist': [{ + 'info_dict': { + 'id': 'd61w8EQoZv1LDuPxDkQP2Q', + 'display_id': '42456529', + 'ext': 'mp4', + 'title': 'GettyImages-1027', + 'duration': 6.0, + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/42061563/p6bY08d2N4e4IDz-7J4_wkgsPq3-qgcx_small.jpg', + }, + }, { + 'info_dict': { + 'id': 'VAsYDi7eiqZRbHodUA2meC', + 'display_id': '42456569', + 'ext': 'mp4', + 'title': 'GettyImages-1325598833', + 'duration': 6.083, + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/42052358/y3qrbDpn_2quWr_5XBi7yzS3UvEI__ZM_small.jpg', + }, + }], + 'playlist_count': 2, + }, { + # <div class="vidyard-player-embed" data-uuid="vpCWTVHw3qrciLtVY94YkS" + 'url': 'https://www.gogoair.com/', + 'info_dict': { + # These values come from the generic extractor and don't matter + 'id': str, + 'title': str, + 'description': str, + 'age_limit': 0, + }, + 'playlist': [{ + 'info_dict': { + 'id': 'vpCWTVHw3qrciLtVY94YkS', + 'display_id': '40780699', + 'ext': 'mp4', + 'title': 'Upgrade to AVANCE 100% worth it - Jason Talley, Owner and Pilot, Testimonial', + 'description': 'md5:f609824839439a51990cef55ffc472aa', + 'duration': 70.737, + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/40780699/KzjfYZz5MZl2gHF_e-4i2c6ib1cLDweQ_small.jpg', + }, + }, { + 'info_dict': { + 'id': 'xAmV9AsLbnitCw35paLBD8', + 'display_id': '31130867', + 'ext': 'mp4', + 'title': 'Brad Keselowski goes faster with Gogo AVANCE inflight Wi-Fi', + 'duration': 132.565, + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/31130867/HknyDtLdm2Eih9JZ4A5XLjhfBX_6HRw5_small.jpg', + }, + }, { + 'info_dict': { + 'id': 'RkkrFRNxfP79nwCQavecpF', + 'display_id': '39009815', + 'ext': 'mp4', + 'title': 'Live Demo of Gogo Galileo', + 'description': 'md5:e2df497236f4e12c3fef8b392b5f23e0', + 'duration': 112.128, + 'thumbnail': 'https://cdn.vidyard.com/thumbnails/38144873/CWLlxfUbJ4Gh0ThbUum89IsEM4yupzMb_small.jpg', + }, + }], + 'playlist_count': 3, + }] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + # Handle protocol-less embed URLs + for embed_url in super()._extract_embed_urls(url, webpage): + if embed_url.startswith('//'): + embed_url = f'https:{embed_url}' + yield embed_url + + # Extract inline/lightbox embeds + for embed_element in re.findall( + r'(<(?:img|div)[^>]* class=(["\'])(?:[^>"\']* )?vidyard-player-embed(?: [^>"\']*)?\2[^>]+>)', webpage): + if video_id := extract_attributes(embed_element[0]).get('data-uuid'): + yield f'https://play.vidyard.com/{video_id}' + + for embed_id in re.findall(r'<script[^>]* id=["\']vidyard_embed_code_([\w-]+)["\']', webpage): + yield f'https://play.vidyard.com/{embed_id}' + + def _real_extract(self, url): + video_id = self._match_id(url) + video_json = self._fetch_video_json(video_id) + + if len(video_json['chapters']) == 1: + return self._process_video_json(video_json['chapters'][0], video_id) + + return self.playlist_result( + [self._process_video_json(chapter, video_id) for chapter in video_json['chapters']], + str(video_json['playerUuid']), video_json.get('name')) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index a4ab7e2..18eb084 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1,6 +1,7 @@ import base64 import functools import itertools +import json import re import urllib.parse @@ -14,6 +15,7 @@ from ..utils import ( determine_ext, get_element_by_class, int_or_none, + join_nonempty, js_to_json, merge_dicts, parse_filesize, @@ -84,29 +86,23 @@ class VimeoBaseInfoExtractor(InfoExtractor): expected=True) return password - def _verify_video_password(self, url, video_id, password, token, vuid): - if url.startswith('http://'): - # vimeo only supports https now, but the user can give an http url - url = url.replace('http://', 'https://') - self._set_vimeo_cookie('vuid', vuid) - return self._download_webpage( - url + '/password', video_id, 'Verifying the password', - 'Wrong password', data=urlencode_postdata({ - 'password': password, - 'token': token, - }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': url, - }) - - def _extract_xsrft_and_vuid(self, webpage): - xsrft = self._search_regex( - r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)', - webpage, 'login token', group='xsrft') - vuid = self._search_regex( - r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1', - webpage, 'vuid', group='vuid') - return xsrft, vuid + def _verify_video_password(self, video_id, password, token): + url = f'https://vimeo.com/{video_id}' + try: + return self._download_webpage( + f'{url}/password', video_id, + 'Submitting video password', data=json.dumps({ + 'password': password, + 'token': token, + }, separators=(',', ':')).encode(), headers={ + 'Accept': '*/*', + 'Content-Type': 'application/json', + 'Referer': url, + }, impersonate=True) + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 418: + raise ExtractorError('Wrong password', expected=True) + raise def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): vimeo_config = self._search_regex( @@ -745,21 +741,34 @@ class VimeoIE(VimeoBaseInfoExtractor): raise ExtractorError('Wrong video password', expected=True) return checked - def _extract_from_api(self, video_id, unlisted_hash=None): - token = self._download_json( - 'https://vimeo.com/_rv/jwt', video_id, headers={ - 'X-Requested-With': 'XMLHttpRequest', - })['token'] - api_url = 'https://api.vimeo.com/videos/' + video_id - if unlisted_hash: - api_url += ':' + unlisted_hash - video = self._download_json( - api_url, video_id, headers={ - 'Authorization': 'jwt ' + token, + def _call_videos_api(self, video_id, jwt_token, unlisted_hash=None): + return self._download_json( + join_nonempty(f'https://api.vimeo.com/videos/{video_id}', unlisted_hash, delim=':'), + video_id, 'Downloading API JSON', headers={ + 'Authorization': f'jwt {jwt_token}', 'Accept': 'application/json', }, query={ 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', }) + + def _extract_from_api(self, video_id, unlisted_hash=None): + viewer = self._download_json( + 'https://vimeo.com/_next/viewer', video_id, 'Downloading viewer info') + + for retry in (False, True): + try: + video = self._call_videos_api(video_id, viewer['jwt'], unlisted_hash) + except ExtractorError as e: + if (not retry and isinstance(e.cause, HTTPError) and e.cause.status == 400 + and 'password' in traverse_obj( + e.cause.response.read(), + ({bytes.decode}, {json.loads}, 'invalid_parameters', ..., 'field'), + )): + self._verify_video_password( + video_id, self._get_video_password(), viewer['xsrft']) + continue + raise + info = self._parse_config(self._download_json( video['config_url'], video_id), video_id) get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) @@ -865,12 +874,6 @@ class VimeoIE(VimeoBaseInfoExtractor): redirect_url, video_id, headers) return self._parse_config(config, video_id) - if re.search(r'<form[^>]+?id="pw_form"', webpage): - video_password = self._get_video_password() - token, vuid = self._extract_xsrft_and_vuid(webpage) - webpage = self._verify_video_password( - redirect_url, video_id, video_password, token, vuid) - vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: seed_status = vimeo_config.get('seed_status') or {} @@ -1290,9 +1293,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): video_password = self._get_video_password() viewer = self._download_json( 'https://vimeo.com/_rv/viewer', video_id) - webpage = self._verify_video_password( - 'https://vimeo.com/' + video_id, video_id, - video_password, viewer['xsrft'], viewer['vuid']) + webpage = self._verify_video_password(video_id, video_password, viewer['xsrft']) clip_page_config = self._parse_json(self._search_regex( r'window\.vimeo\.clip_page_config\s*=\s*({.+?});', webpage, 'clip page config'), video_id) diff --git a/yt_dlp/extractor/vtv.py b/yt_dlp/extractor/vtv.py new file mode 100644 index 0000000..97134ee --- /dev/null +++ b/yt_dlp/extractor/vtv.py @@ -0,0 +1,108 @@ +from .common import InfoExtractor +from ..utils import extract_attributes, get_element_html_by_class, remove_start + + +class VTVGoIE(InfoExtractor): + _VALID_URL = [ + r'https?://(?:www\.)?vtvgo\.vn/(kho-video|tin-tuc)/[\w.-]*?(?P<id>\d+)(?:\.[a-z]+|/)?(?:$|[?#])', + r'https?://(?:www\.)?vtvgo\.vn/digital/detail\.php\?(?:[^#]+&)?content_id=(?P<id>\d+)', + ] + _TESTS = [{ + 'url': 'https://vtvgo.vn/kho-video/bep-vtv-vit-chao-rieng-so-24-888456.html', + 'info_dict': { + 'id': '888456', + 'ext': 'mp4', + 'title': 'Bếp VTV | Vịt chao riềng | Số 24', + 'description': 'md5:2b4e93ec2b954304170d32be288ce2c8', + 'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20230201/VIT-CHAO-RIENG_VTV_638108894672812459.jpg', + }, + }, { + 'url': 'https://vtvgo.vn/tin-tuc/hot-search-1-zlife-khong-ngo-toi-phai-khong-862074', + 'info_dict': { + 'id': '862074', + 'ext': 'mp4', + 'title': 'Hot Search #1 | Zlife | Không ngờ tới phải không? ', + 'description': 'md5:e967d0e2efbbebbee8814a55799b4d0f', + 'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20220504/6b9a8552-e71c-46ce-bc9d-50c9bb506f9c.jpeg', + }, + }, { + 'url': 'https://vtvgo.vn/kho-video/918311.html', + 'info_dict': { + 'id': '918311', + 'title': 'Cà phê sáng | 05/02/2024 | Tái hiện hình ảnh Hà Nội xưa tại ngôi nhà di sản', + 'ext': 'mp4', + 'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240205/0506_ca_phe_sang_638427226021318322.jpg', + 'description': 'md5:b121c67948f1ce58e6a036042fc14c1b', + }, + }, { + 'url': 'https://vtvgo.vn/digital/detail.php?digital_id=168&content_id=918634', + 'info_dict': { + 'id': '918634', + 'ext': 'mp4', + 'title': 'Gặp nhau cuối năm | Táo quân 2024', + 'description': 'md5:a1c221e78e5954d29d49b2a11c20513c', + 'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240210/d0f73369-8f03-4108-9edd-83d4bc3997b2.png', + }, + }, { + 'url': 'https://vtvgo.vn/digital/detail.php?content_id=919358', + 'info_dict': { + 'id': '919358', + 'ext': 'mp4', + 'title': 'Chúng ta của 8 năm sau | Tập 45 | Dương có bằng chứng, nhân chứng vạch mặt ông Khiêm', + 'description': 'md5:16ff5208cac6585137f554472a4677f3', + 'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240221/550deff9-7736-4a0e-8b5d-33274d97cd7d.jpg', + }, + }, { + 'url': 'https://vtvgo.vn/kho-video/888456', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + m3u8_url = self._search_regex( + r'(?:var\s+link\s*=\s*|addPlayer\()["\'](https://[^"\']+/index\.m3u8)["\']', webpage, 'm3u8 url') + return { + 'id': video_id, + 'title': self._og_search_title(webpage, default=None), + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'), + } + + +class VTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vtv\.vn/video/[\w-]*?(?P<id>\d+)\.htm' + _TESTS = [{ + 'url': 'https://vtv.vn/video/thoi-su-20h-vtv1-12-6-2024-680411.htm', + 'info_dict': { + 'id': '680411', + 'ext': 'mp4', + 'title': 'Thời sự 20h VTV1 - 12/6/2024 - Video đã phát trên VTV1 | VTV.VN', + 'thumbnail': 'https://cdn-images.vtv.vn/zoom/600_315/66349b6076cb4dee98746cf1/2024/06/12/thumb/1206-ts-20h-02929741475480320806760.mp4/thumb0.jpg', + }, + }, { + 'url': 'https://vtv.vn/video/zlife-1-khong-ngo-toi-phai-khong-vtv24-560248.htm', + 'info_dict': { + 'id': '560248', + 'ext': 'mp4', + 'title': 'ZLife #1: Không ngờ tới phải không? | VTV24 - Video đã phát trên VTV-NEWS | VTV.VN', + 'description': 'Ai đứng sau vụ việc thay đổi ảnh đại diện trên các trang mạng xã hội của VTV Digital tối 2/5?', + 'thumbnail': 'https://video-thumbs.mediacdn.vn/zoom/600_315/vtv/2022/5/13/t67s6btf3ji-16524555726231894427334.jpg', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + data_vid = extract_attributes(get_element_html_by_class( + 'VCSortableInPreviewMode', get_element_html_by_class( + 'video-highlight-box', webpage)))['data-vid'] + m3u8_url = f'https://cdn-videos.vtv.vn/{remove_start(data_vid, "vtv.mediacdn.vn/")}/master.m3u8' + return { + 'id': video_id, + 'title': self._og_search_title(webpage, default=None), + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'), + } diff --git a/yt_dlp/extractor/yle_areena.py b/yt_dlp/extractor/yle_areena.py index 796f7f3..ef9e968 100644 --- a/yt_dlp/extractor/yle_areena.py +++ b/yt_dlp/extractor/yle_areena.py @@ -11,6 +11,7 @@ from ..utils import ( class YleAreenaIE(InfoExtractor): _VALID_URL = r'https?://areena\.yle\.fi/(?P<id>[\d-]+)' + _GEO_COUNTRIES = ['FI'] _TESTS = [ { 'url': 'https://areena.yle.fi/1-4371942', @@ -19,7 +20,7 @@ class YleAreenaIE(InfoExtractor): 'id': '0_a3tjk92c', 'ext': 'mp4', 'title': 'Pouchit', - 'description': 'md5:d487309c3abbe5650265bbd1742d2f82', + 'description': 'md5:01071d7056ceec375f63960f90c35366', 'series': 'Modernit miehet', 'season': 'Season 1', 'season_number': 1, @@ -87,8 +88,8 @@ class YleAreenaIE(InfoExtractor): }) # Example title: 'K1, J2: Pouchit | Modernit miehet' - series, season_number, episode_number, episode = self._search_regex( - r'K(?P<season_no>[\d]+),\s*J(?P<episode_no>[\d]+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)', + season_number, episode_number, episode, series = self._search_regex( + r'K(?P<season_no>\d+),\s*J(?P<episode_no>\d+):?\s*\b(?P<episode>[^|]+)\s*|\s*(?P<series>.+)', info.get('title') or '', 'episode metadata', group=('season_no', 'episode_no', 'episode', 'series'), default=(None, None, None, None)) description = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'description', 'fin'), expected_type=str) @@ -110,10 +111,12 @@ class YleAreenaIE(InfoExtractor): 'ie_key': KalturaIE.ie_key(), } else: + formats, subs = self._extract_m3u8_formats_and_subtitles( + video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls') + self._merge_subtitles(subs, target=subtitles) info_dict = { 'id': video_id, - 'formats': self._extract_m3u8_formats( - video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls'), + 'formats': formats, } return { @@ -129,6 +132,6 @@ class YleAreenaIE(InfoExtractor): or int_or_none(episode_number)), 'thumbnails': traverse_obj(info, ('thumbnails', ..., {'url': 'url'})), 'age_limit': traverse_obj(video_data, ('data', 'ongoing_ondemand', 'content_rating', 'age_restriction'), expected_type=int_or_none), - 'subtitles': subtitles, + 'subtitles': subtitles or None, 'release_date': unified_strdate(traverse_obj(video_data, ('data', 'ongoing_ondemand', 'start_time'), expected_type=str)), } diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index b5e1e29..b582b7d 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -5026,7 +5026,7 @@ MEDIA_EXTENSIONS = Namespace( common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'), video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'), common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'), - audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'), + audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'), thumbnails=('jpg', 'png', 'webp'), storyboards=('mhtml', ), subtitles=('srt', 'vtt', 'ass', 'lrc'), @@ -5059,27 +5059,53 @@ class _UnsafeExtensionError(Exception): # video *MEDIA_EXTENSIONS.video, - 'avif', + 'asx', 'ismv', + 'm2t', 'm2ts', + 'm2v', 'm4s', 'mng', + 'mp2v', + 'mp4v', + 'mpe', 'mpeg', + 'mpeg1', + 'mpeg2', + 'mpeg4', + 'mxf', + 'ogm', 'qt', + 'rm', 'swf', 'ts', + 'vob', 'vp9', - 'wvm', # audio *MEDIA_EXTENSIONS.audio, + '3ga', + 'ac3', + 'adts', + 'aif', + 'au', + 'dts', 'isma', + 'it', 'mid', + 'mod', 'mpga', + 'mp1', + 'mp2', + 'mp4a', + 'mpa', 'ra', + 'shn', + 'xm', # image *MEDIA_EXTENSIONS.thumbnails, + 'avif', 'bmp', 'gif', 'heic', @@ -5089,6 +5115,7 @@ class _UnsafeExtensionError(Exception): 'jxl', 'svg', 'tif', + 'tiff', 'wbmp', # subtitle @@ -5096,11 +5123,16 @@ class _UnsafeExtensionError(Exception): 'dfxp', 'fs', 'ismt', + 'json3', 'sami', 'scc', + 'srv1', + 'srv2', + 'srv3', 'ssa', 'tt', 'ttml', + 'xml', # others *MEDIA_EXTENSIONS.manifests, @@ -5111,7 +5143,6 @@ class _UnsafeExtensionError(Exception): 'sbv', 'url', 'webloc', - 'xml', ]) def __init__(self, extension, /): @@ -5120,6 +5151,9 @@ class _UnsafeExtensionError(Exception): @classmethod def sanitize_extension(cls, extension, /, *, prepend=False): + if extension is None: + return None + if '/' in extension or '\\' in extension: raise cls(extension) diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 7581a3b..323b54c 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.07.02' +__version__ = '2024.07.07' -RELEASE_GIT_HEAD = '93d33cb29af9e2e84369ac43589d50ce8e0160ef' +RELEASE_GIT_HEAD = 'b337d2989ce0614651d363383f6f743d977248ef' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.07.02' +_pkg_version = '2024.07.07' |