diff options
Diffstat (limited to '')
-rw-r--r-- | yt_dlp/extractor/soundcloud.py | 106 |
1 files changed, 57 insertions, 49 deletions
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 3581461..0c6f0b0 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -4,13 +4,11 @@ import json import re from .common import InfoExtractor, SearchInfoExtractor -from ..compat import compat_str from ..networking import HEADRequest from ..networking.exceptions import HTTPError from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, - error_to_compat_str, float_or_none, int_or_none, join_nonempty, @@ -97,7 +95,7 @@ class SoundcloudBaseIE(InfoExtractor): return raise ExtractorError('Unable to extract client id') - def _download_json(self, *args, **kwargs): + def _call_api(self, *args, **kwargs): non_fatal = kwargs.get('fatal') is False if non_fatal: del kwargs['fatal'] @@ -106,14 +104,14 @@ class SoundcloudBaseIE(InfoExtractor): query['client_id'] = self._CLIENT_ID kwargs['query'] = query try: - return super()._download_json(*args, **kwargs) + return self._download_json(*args, **kwargs) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): self._store_client_id(None) self._update_client_id() continue elif non_fatal: - self.report_warning(error_to_compat_str(e)) + self.report_warning(str(e)) return False raise @@ -165,7 +163,7 @@ class SoundcloudBaseIE(InfoExtractor): 'user_agent': self._USER_AGENT } - response = self._download_json( + response = self._call_api( self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID), None, note='Verifying login token...', fatal=False, data=json.dumps(payload).encode()) @@ -196,24 +194,20 @@ class SoundcloudBaseIE(InfoExtractor): t = clid # _CLIENT_ID d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]]) - p = n + y + d + r + e + t + d + n - h = p + h = n + y + d + r + e + t + d + n m = 8011470 - f = 0 - for f in range(f, len(h)): + for f in range(len(h)): m = (m >> 1) + ((1 & m) << 23) m += ord(h[f]) m &= 16777215 # c is not even needed - out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c) - - return out + return f'{y}:{d}:{m:x}:{c}' def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False): - track_id = compat_str(info['id']) + track_id = str(info['id']) title = info['title'] format_urls = set() @@ -223,12 +217,26 @@ class SoundcloudBaseIE(InfoExtractor): query['secret_token'] = secret_token if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'): - download_url = update_url_query( - self._API_V2_BASE + 'tracks/' + track_id + '/download', query) - redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') - if redirect_url: + try: + # Do not use _call_api(); HTTP Error codes have different meanings for this request + download_data = self._download_json( + f'{self._API_V2_BASE}tracks/{track_id}/download', track_id, + 'Downloading original download format info JSON', query=query, headers=self._HEADERS) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + self.report_warning( + 'Original download format is only available ' + f'for registered users. {self._login_hint()}') + elif isinstance(e.cause, HTTPError) and e.cause.status == 403: + self.write_debug('Original download format is not available for this client') + else: + self.report_warning(e.msg) + download_data = None + + if redirect_url := traverse_obj(download_data, ('redirectUri', {url_or_none})): urlh = self._request_webpage( - HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False) + HEADRequest(redirect_url), track_id, 'Checking original download format availability', + 'Original download format is not available', fatal=False) if urlh: format_url = urlh.url format_urls.add(format_url) @@ -309,7 +317,7 @@ class SoundcloudBaseIE(InfoExtractor): stream = None for retry in self.RetryManager(fatal=False): try: - stream = self._download_json( + stream = self._call_api( format_url, track_id, f'Downloading {identifier} format info JSON', query=query, headers=self._HEADERS) except ExtractorError as e: @@ -342,12 +350,12 @@ class SoundcloudBaseIE(InfoExtractor): thumbnails = [] artwork_url = info.get('artwork_url') thumbnail = artwork_url or user.get('avatar_url') - if isinstance(thumbnail, compat_str): + if isinstance(thumbnail, str): if re.search(self._IMAGE_REPL_RE, thumbnail): for image_id, size in self._ARTWORK_MAP.items(): i = { 'id': image_id, - 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail), + 'url': re.sub(self._IMAGE_REPL_RE, f'-{image_id}.jpg', thumbnail), } if image_id == 'tiny' and not artwork_url: size = 18 @@ -363,7 +371,7 @@ class SoundcloudBaseIE(InfoExtractor): thumbnails = [{'url': thumbnail}] def extract_count(key): - return int_or_none(info.get('%s_count' % key)) + return int_or_none(info.get(f'{key}_count')) return { 'id': track_id, @@ -382,7 +390,7 @@ class SoundcloudBaseIE(InfoExtractor): 'comment_count': extract_count('comment'), 'repost_count': extract_count('reposts'), 'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)), - 'formats': formats if not extract_flat else None + 'formats': formats if not extract_flat else None, } @classmethod @@ -434,7 +442,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg', 'uploader_url': 'https://soundcloud.com/ethmusic', 'genres': [], - } + }, }, # geo-restricted { @@ -467,7 +475,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'description': 'test chars: \"\'/\\ä↭', + 'description': 'test chars: "\'/\\ä↭', 'uploader': 'jaimeMF', 'uploader_id': '69767071', 'timestamp': 1386604920, @@ -491,7 +499,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'description': 'test chars: \"\'/\\ä↭', + 'description': 'test chars: "\'/\\ä↭', 'uploader': 'jaimeMF', 'uploader_id': '69767071', 'timestamp': 1386604920, @@ -630,13 +638,13 @@ class SoundcloudIE(SoundcloudBaseIE): if token: query['secret_token'] = token else: - full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title') + full_title = resolve_title = '{}/{}'.format(*mobj.group('uploader', 'title')) token = mobj.group('token') if token: - resolve_title += '/%s' % token + resolve_title += f'/{token}' info_json_url = self._resolv_url(self._BASE_URL + resolve_title) - info = self._download_json( + info = self._call_api( info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS) return self._extract_info_dict(info, full_title, token) @@ -644,13 +652,13 @@ class SoundcloudIE(SoundcloudBaseIE): class SoundcloudPlaylistBaseIE(SoundcloudBaseIE): def _extract_set(self, playlist, token=None): - playlist_id = compat_str(playlist['id']) + playlist_id = str(playlist['id']) tracks = playlist.get('tracks') or [] - if not all([t.get('permalink_url') for t in tracks]) and token: - tracks = self._download_json( + if not all(t.get('permalink_url') for t in tracks) and token: + tracks = self._call_api( self._API_V2_BASE + 'tracks', playlist_id, 'Downloading tracks', query={ - 'ids': ','.join([compat_str(t['id']) for t in tracks]), + 'ids': ','.join([str(t['id']) for t in tracks]), 'playlistId': playlist_id, 'playlistSecretToken': token, }, headers=self._HEADERS) @@ -700,17 +708,17 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) - full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title') + full_title = '{}/sets/{}'.format(*mobj.group('uploader', 'slug_title')) token = mobj.group('token') if token: full_title += '/' + token - info = self._download_json(self._resolv_url( + info = self._call_api(self._resolv_url( self._BASE_URL + full_title), full_title, headers=self._HEADERS) if 'errors' in info: - msgs = (compat_str(err['error_message']) for err in info['errors']) - raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) + msgs = (str(err['error_message']) for err in info['errors']) + raise ExtractorError('unable to download video webpage: {}'.format(','.join(msgs))) return self._extract_set(info, token) @@ -736,7 +744,7 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE): for i in itertools.count(): for retry in self.RetryManager(): try: - response = self._download_json( + response = self._call_api( url, playlist_id, query=query, headers=self._HEADERS, note=f'Downloading track page {i + 1}') break @@ -844,7 +852,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): mobj = self._match_valid_url(url) uploader = mobj.group('user') - user = self._download_json( + user = self._call_api( self._resolv_url(self._BASE_URL + uploader), uploader, 'Downloading user info', headers=self._HEADERS) @@ -853,7 +861,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): return self._extract_playlist( self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'], str_or_none(user.get('id')), - '%s (%s)' % (user['username'], resource.capitalize())) + '{} ({})'.format(user['username'], resource.capitalize())) class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE): @@ -870,7 +878,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): user_id = self._match_id(url) - user = self._download_json( + user = self._call_api( self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS) return self._extract_playlist( @@ -892,13 +900,13 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): track_name = self._match_id(url) - track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS) + track = self._call_api(self._resolv_url(url), track_name, headers=self._HEADERS) track_id = self._search_regex( r'soundcloud:track-stations:(\d+)', track['id'], 'track id') return self._extract_playlist( - self._API_V2_BASE + 'stations/%s/tracks' % track['id'], - track_id, 'Track station: %s' % track['title']) + self._API_V2_BASE + 'stations/{}/tracks'.format(track['id']), + track_id, 'Track station: {}'.format(track['title'])) class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE): @@ -936,7 +944,7 @@ class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): slug, relation = self._match_valid_url(url).group('slug', 'relation') - track = self._download_json( + track = self._call_api( self._resolv_url(self._BASE_URL + slug), slug, 'Downloading track info', headers=self._HEADERS) @@ -946,7 +954,7 @@ class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE): return self._extract_playlist( self._API_V2_BASE + self._BASE_URL_MAP[relation] % track['id'], str(track['id']), - '%s (%s)' % (track.get('title') or slug, relation.capitalize())) + '{} ({})'.format(track.get('title') or slug, relation.capitalize())) class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): @@ -971,7 +979,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): if token: query['secret_token'] = token - data = self._download_json( + data = self._call_api( self._API_V2_BASE + 'playlists/' + playlist_id, playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS) @@ -1006,7 +1014,7 @@ class SoundcloudSearchIE(SoundcloudBaseIE, SearchInfoExtractor): next_url = update_url_query(self._API_V2_BASE + endpoint, query) for i in itertools.count(1): - response = self._download_json( + response = self._call_api( next_url, collection_id, f'Downloading page {i}', 'Unable to download API page', headers=self._HEADERS) |