Merging upstream version 2024.07.01.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-08-05 09:06:11 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-08-05 09:06:11 +0000
commit: fd5a06560caab95c71a2e2e805efa8d0f3a696a0 (patch)
tree: e1c600b8612bc4b301e2f51b875fcd835c5008cc /yt_dlp/extractor/vimeo.py
parent: Releasing progress-linux version 2024.05.27-1~progress7.99u1. (diff)
download: yt-dlp-fd5a06560caab95c71a2e2e805efa8d0f3a696a0.tar.xz
yt-dlp-fd5a06560caab95c71a2e2e805efa8d0f3a696a0.zip
1 files changed, 49 insertions, 37 deletions
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index ac96ade..a4ab7e2 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -2,9 +2,9 @@ import base64
 import functools
 import itertools
 import re
+import urllib.parse
 
 from .common import InfoExtractor
-from ..compat import compat_str, compat_urlparse
 from ..networking import HEADRequest, Request
 from ..networking.exceptions import HTTPError
 from ..utils import (
@@ -141,7 +141,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
                 continue
             formats.append({
                 'url': video_url,
-                'format_id': 'http-%s' % f.get('quality'),
+                'format_id': 'http-{}'.format(f.get('quality')),
                 'source_preference': 10,
                 'width': int_or_none(f.get('width')),
                 'height': int_or_none(f.get('height')),
@@ -156,19 +156,19 @@ class VimeoBaseInfoExtractor(InfoExtractor):
                 manifest_url = cdn_data.get('url')
                 if not manifest_url:
                     continue
-                format_id = '%s-%s' % (files_type, cdn_name)
+                format_id = f'{files_type}-{cdn_name}'
                 sep_manifest_urls = []
                 if re.search(sep_pattern, manifest_url):
                     for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
                         sep_manifest_urls.append((format_id + suffix, re.sub(
-                            sep_pattern, '/%s/' % repl, manifest_url)))
+                            sep_pattern, f'/{repl}/', manifest_url)))
                 else:
                     sep_manifest_urls = [(format_id, manifest_url)]
                 for f_id, m_url in sep_manifest_urls:
                     if files_type == 'hls':
                         fmts, subs = self._extract_m3u8_formats_and_subtitles(
                             m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id,
-                            note='Downloading %s m3u8 information' % cdn_name,
+                            note=f'Downloading {cdn_name} m3u8 information',
                             fatal=False)
                         formats.extend(fmts)
                         self._merge_subtitles(subs, target=subtitles)
@@ -179,7 +179,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
                                 m_url = real_m_url
                         fmts, subs = self._extract_mpd_formats_and_subtitles(
                             m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
-                            'Downloading %s MPD information' % cdn_name,
+                            f'Downloading {cdn_name} MPD information',
                             fatal=False)
                         formats.extend(fmts)
                         self._merge_subtitles(subs, target=subtitles)
@@ -256,10 +256,10 @@ class VimeoBaseInfoExtractor(InfoExtractor):
         download_url = try_get(source_file, lambda x: x['download_url'])
         if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
             source_name = source_file.get('public_name', 'Original')
-            if self._is_valid_url(download_url, video_id, '%s video' % source_name):
+            if self._is_valid_url(download_url, video_id, f'{source_name} video'):
                 ext = (try_get(
                     source_file, lambda x: x['extension'],
-                    compat_str) or determine_ext(
+                    str) or determine_ext(
                     download_url, None) or 'mp4').lower()
                 return {
                     'url': download_url,
@@ -275,7 +275,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
             'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {}
         if not jwt_response.get('jwt'):
             return
-        headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'}
+        headers = {'Authorization': 'jwt {}'.format(jwt_response['jwt']), 'Accept': 'application/json'}
         original_response = self._download_json(
             f'https://api.vimeo.com/videos/{video_id}', video_id,
             headers=headers, fatal=False, expected_status=(403, 404)) or {}
@@ -361,7 +361,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'params': {
                 'format': 'best[protocol=https]',
             },
-            'skip': 'No longer available'
+            'skip': 'No longer available',
         },
         {
             'url': 'http://player.vimeo.com/video/54469442',
@@ -739,7 +739,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             'Content-Type': 'application/x-www-form-urlencoded',
         })
         checked = self._download_json(
-            f'{compat_urlparse.urlsplit(url)._replace(query=None).geturl()}/check-password',
+            f'{urllib.parse.urlsplit(url)._replace(query=None).geturl()}/check-password',
             video_id, 'Verifying the password', data=data, headers=headers)
         if checked is False:
             raise ExtractorError('Wrong video password', expected=True)
@@ -748,7 +748,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
     def _extract_from_api(self, video_id, unlisted_hash=None):
         token = self._download_json(
             'https://vimeo.com/_rv/jwt', video_id, headers={
-                'X-Requested-With': 'XMLHttpRequest'
+                'X-Requested-With': 'XMLHttpRequest',
             })['token']
         api_url = 'https://api.vimeo.com/videos/' + video_id
         if unlisted_hash:
@@ -802,7 +802,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             self._set_vimeo_cookie('vuid', viewer['vuid'])
             try:
                 self._download_json(
-                    'https://vimeo.com/showcase/%s/auth' % album_id,
+                    f'https://vimeo.com/showcase/{album_id}/auth',
                     album_id, 'Verifying the password', data=urlencode_postdata({
                         'password': password,
                         'token': viewer['xsrft'],
@@ -829,21 +829,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
             url = 'https://vimeo.com/' + video_id
 
         self._try_album_password(url)
+        is_secure = urllib.parse.urlparse(url).scheme == 'https'
         try:
             # Retrieve video webpage to extract further information
             webpage, urlh = self._download_webpage_handle(
-                url, video_id, headers=headers)
+                url, video_id, headers=headers, impersonate=is_secure)
             redirect_url = urlh.url
-        except ExtractorError as ee:
-            if isinstance(ee.cause, HTTPError) and ee.cause.status == 403:
-                errmsg = ee.cause.response.read()
-                if b'Because of its privacy settings, this video cannot be played here' in errmsg:
-                    raise ExtractorError(
-                        'Cannot download embed-only video without embedding '
-                        'URL. Please call yt-dlp with the URL of the page '
-                        'that embeds this video.',
-                        expected=True)
-            raise
+        except ExtractorError as error:
+            if not isinstance(error.cause, HTTPError) or error.cause.status not in (403, 429):
+                raise
+            errmsg = error.cause.response.read()
+            if b'Because of its privacy settings, this video cannot be played here' in errmsg:
+                raise ExtractorError(
+                    'Cannot download embed-only video without embedding URL. Please call yt-dlp '
+                    'with the URL of the page that embeds this video.', expected=True)
+            # 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block
+            status = error.cause.status
+            dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked'
+            if target := error.cause.response.extensions.get('impersonate'):
+                raise ExtractorError(
+                    f'Got HTTP Error {status} when using impersonate target "{target}". {dcip_msg}')
+            elif not is_secure:
+                raise ExtractorError(f'Got HTTP Error {status}. {dcip_msg}', expected=True)
+            raise ExtractorError(
+                'This request has been blocked due to its TLS fingerprint. Install a '
+                'required impersonation dependency if possible, or else if you are okay with '
+                f'{self._downloader._format_err("compromising your security/cookies", "light red")}, '
+                f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True)
 
         if '://player.vimeo.com/video/' in url:
             config = self._search_json(
@@ -864,7 +876,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             seed_status = vimeo_config.get('seed_status') or {}
             if seed_status.get('state') == 'failed':
                 raise ExtractorError(
-                    '%s said: %s' % (self.IE_NAME, seed_status['title']),
+                    '{} said: {}'.format(self.IE_NAME, seed_status['title']),
                     expected=True)
 
         cc_license = None
@@ -916,7 +928,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
             feature_id = vod.get('feature_id')
             if feature_id and not data.get('force_feature_id', False):
                 return self.url_result(smuggle_url(
-                    'https://player.vimeo.com/player/%s' % feature_id,
+                    f'https://player.vimeo.com/player/{feature_id}',
                     {'force_feature_id': True}), 'Vimeo')
 
         if not video_description:
@@ -1051,7 +1063,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
     _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s'
 
     def _page_url(self, base_url, pagenum):
-        return '%s/videos/page:%d/' % (base_url, pagenum)
+        return f'{base_url}/videos/page:{pagenum}/'
 
     def _extract_list_title(self, webpage):
         return self._TITLE or self._html_search_regex(
@@ -1062,7 +1074,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
             page_url = self._page_url(base_url, pagenum)
             webpage = self._download_webpage(
                 page_url, list_id,
-                'Downloading page %s' % pagenum)
+                f'Downloading page {pagenum}')
 
             if pagenum == 1:
                 yield self._extract_list_title(webpage)
@@ -1074,13 +1086,13 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
             if clips:
                 for video_id, video_url, video_title in clips:
                     yield self.url_result(
-                        compat_urlparse.urljoin(base_url, video_url),
+                        urllib.parse.urljoin(base_url, video_url),
                         VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
             # More relaxed fallback
             else:
                 for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
                     yield self.url_result(
-                        'https://vimeo.com/%s' % video_id,
+                        f'https://vimeo.com/{video_id}',
                         VimeoIE.ie_key(), video_id=video_id)
 
             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
@@ -1135,7 +1147,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
         'playlist_count': 1,
         'params': {
             'videopassword': 'youtube-dl',
-        }
+        },
     }]
     _PAGE_SIZE = 100
 
@@ -1150,8 +1162,8 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
             query['_hashed_pass'] = hashed_pass
         try:
             videos = self._download_json(
-                'https://api.vimeo.com/albums/%s/videos' % album_id,
-                album_id, 'Downloading page %d' % api_page, query=query, headers={
+                f'https://api.vimeo.com/albums/{album_id}/videos',
+                album_id, f'Downloading page {api_page}', query=query, headers={
                     'Authorization': 'jwt ' + authorization,
                     'Accept': 'application/json',
                 })['data']
@@ -1191,7 +1203,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor):
             self._set_vimeo_cookie('vuid', viewer['vuid'])
             try:
                 hashed_pass = self._download_json(
-                    'https://vimeo.com/showcase/%s/auth' % album_id,
+                    f'https://vimeo.com/showcase/{album_id}/auth',
                     album_id, 'Verifying the password', data=urlencode_postdata({
                         'password': password,
                         'token': viewer['xsrft'],
@@ -1311,7 +1323,7 @@ class VimeoWatchLaterIE(VimeoChannelIE):  # XXX: Do not subclass from concrete I
     }]
 
     def _page_url(self, base_url, pagenum):
-        url = '%s/page:%d/' % (base_url, pagenum)
+        url = f'{base_url}/page:{pagenum}/'
         request = Request(url)
         # Set the header to get a partial html page with the ids,
         # the normal page doesn't contain them.
@@ -1339,11 +1351,11 @@ class VimeoLikesIE(VimeoChannelIE):  # XXX: Do not subclass from concrete IE
     }]
 
     def _page_url(self, base_url, pagenum):
-        return '%s/page:%d/' % (base_url, pagenum)
+        return f'{base_url}/page:{pagenum}/'
 
     def _real_extract(self, url):
         user_id = self._match_id(url)
-        return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
+        return self._extract_videos(user_id, f'https://vimeo.com/{user_id}/likes')
 
 
 class VHXEmbedIE(VimeoBaseInfoExtractor):
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-08-05 09:06:11 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-08-05 09:06:11 +0000
commit	fd5a06560caab95c71a2e2e805efa8d0f3a696a0 (patch)
tree	e1c600b8612bc4b301e2f51b875fcd835c5008cc /yt_dlp/extractor/vimeo.py
parent	Releasing progress-linux version 2024.05.27-1~progress7.99u1. (diff)
download	yt-dlp-fd5a06560caab95c71a2e2e805efa8d0f3a696a0.tar.xz yt-dlp-fd5a06560caab95c71a2e2e805efa8d0f3a696a0.zip