diff options
Diffstat (limited to 'yt_dlp/extractor/soundcloud.py')
-rw-r--r-- | yt_dlp/extractor/soundcloud.py | 108 |
1 files changed, 81 insertions, 27 deletions
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index c9ed645..3581461 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -1,3 +1,4 @@ +import functools import itertools import json import re @@ -12,6 +13,7 @@ from ..utils import ( error_to_compat_str, float_or_none, int_or_none, + join_nonempty, mimetype2ext, parse_qs, str_or_none, @@ -68,6 +70,16 @@ class SoundcloudBaseIE(InfoExtractor): 'original': 0, } + _DEFAULT_FORMATS = ['http_aac', 'hls_aac', 'http_opus', 'hls_opus', 'http_mp3', 'hls_mp3'] + + @functools.cached_property + def _is_requested(self): + return re.compile(r'|'.join(set( + re.escape(pattern).replace(r'\*', r'.*') if pattern != 'default' + else '|'.join(map(re.escape, self._DEFAULT_FORMATS)) + for pattern in self._configuration_arg('formats', ['default'], ie_key=SoundcloudIE) + ))).fullmatch + def _store_client_id(self, client_id): self.cache.store('soundcloud', 'client_id', client_id) @@ -216,7 +228,7 @@ class SoundcloudBaseIE(InfoExtractor): redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') if redirect_url: urlh = self._request_webpage( - HEADRequest(redirect_url), track_id, fatal=False) + HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False) if urlh: format_url = urlh.url format_urls.add(format_url) @@ -258,7 +270,7 @@ class SoundcloudBaseIE(InfoExtractor): abr = f.get('abr') if abr: f['abr'] = int(abr) - if protocol == 'hls': + if protocol in ('hls', 'hls-aes'): protocol = 'm3u8' if ext == 'aac' else 'm3u8_native' else: protocol = 'http' @@ -274,11 +286,32 @@ class SoundcloudBaseIE(InfoExtractor): if extract_flat: break format_url = t['url'] - stream = None + protocol = traverse_obj(t, ('format', 'protocol', {str})) + if protocol == 'progressive': + protocol = 'http' + if protocol != 'hls' and '/hls' in format_url: + protocol = 'hls' + if protocol == 'encrypted-hls' or '/encrypted-hls' in format_url: + protocol = 'hls-aes' + + ext = None + if preset := traverse_obj(t, ('preset', {str_or_none})): + ext = preset.split('_')[0] + if ext not in KNOWN_EXTENSIONS: + ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str}))) + + identifier = join_nonempty(protocol, ext, delim='_') + if not self._is_requested(identifier): + self.write_debug(f'"{identifier}" is not a requested format, skipping') + continue + + stream = None for retry in self.RetryManager(fatal=False): try: - stream = self._download_json(format_url, track_id, query=query, headers=self._HEADERS) + stream = self._download_json( + format_url, track_id, f'Downloading {identifier} format info JSON', + query=query, headers=self._HEADERS) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 429: self.report_warning( @@ -289,27 +322,14 @@ class SoundcloudBaseIE(InfoExtractor): else: self.report_warning(e.msg) - if not isinstance(stream, dict): - continue - stream_url = url_or_none(stream.get('url')) + stream_url = traverse_obj(stream, ('url', {url_or_none})) if invalid_url(stream_url): continue format_urls.add(stream_url) - stream_format = t.get('format') or {} - protocol = stream_format.get('protocol') - if protocol != 'hls' and '/hls' in format_url: - protocol = 'hls' - ext = None - preset = str_or_none(t.get('preset')) - if preset: - ext = preset.split('_')[0] - if ext not in KNOWN_EXTENSIONS: - ext = mimetype2ext(stream_format.get('mime_type')) add_format({ 'url': stream_url, 'ext': ext, - }, 'http' if protocol == 'progressive' else protocol, - t.get('snipped') or '/preview/' in format_url) + }, protocol, t.get('snipped') or '/preview/' in format_url) for f in formats: f['vcodec'] = 'none' @@ -361,7 +381,7 @@ class SoundcloudBaseIE(InfoExtractor): 'like_count': extract_count('favoritings') or extract_count('likes'), 'comment_count': extract_count('comment'), 'repost_count': extract_count('reposts'), - 'genre': info.get('genre'), + 'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)), 'formats': formats if not extract_flat else None } @@ -395,10 +415,10 @@ class SoundcloudIE(SoundcloudBaseIE): _TESTS = [ { 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', - 'md5': 'ebef0a451b909710ed1d7787dddbf0d7', + 'md5': 'de9bac153e7427a7333b4b0c1b6a18d2', 'info_dict': { 'id': '62986583', - 'ext': 'mp3', + 'ext': 'opus', 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', 'uploader': 'E.T. ExTerrestrial Music', @@ -411,6 +431,9 @@ class SoundcloudIE(SoundcloudBaseIE): 'like_count': int, 'comment_count': int, 'repost_count': int, + 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg', + 'uploader_url': 'https://soundcloud.com/ethmusic', + 'genres': [], } }, # geo-restricted @@ -418,7 +441,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', 'info_dict': { 'id': '47127627', - 'ext': 'mp3', + 'ext': 'opus', 'title': 'Goldrushed', 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', @@ -431,6 +454,9 @@ class SoundcloudIE(SoundcloudBaseIE): 'like_count': int, 'comment_count': int, 'repost_count': int, + 'uploader_url': 'https://soundcloud.com/the-concept-band', + 'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg', + 'genres': ['Alternative'], }, }, # private link @@ -452,6 +478,9 @@ class SoundcloudIE(SoundcloudBaseIE): 'like_count': int, 'comment_count': int, 'repost_count': int, + 'uploader_url': 'https://soundcloud.com/jaimemf', + 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png', + 'genres': ['youtubedl'], }, }, # private link (alt format) @@ -473,6 +502,9 @@ class SoundcloudIE(SoundcloudBaseIE): 'like_count': int, 'comment_count': int, 'repost_count': int, + 'uploader_url': 'https://soundcloud.com/jaimemf', + 'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png', + 'genres': ['youtubedl'], }, }, # downloadable song @@ -482,6 +514,21 @@ class SoundcloudIE(SoundcloudBaseIE): 'info_dict': { 'id': '343609555', 'ext': 'wav', + 'title': 'The Following', + 'description': '', + 'uploader': '80M', + 'uploader_id': '312384765', + 'uploader_url': 'https://soundcloud.com/the80m', + 'upload_date': '20170922', + 'timestamp': 1506120436, + 'duration': 397.228, + 'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg', + 'license': 'all-rights-reserved', + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + 'view_count': int, + 'genres': ['Dance & EDM'], }, }, # private link, downloadable format @@ -503,6 +550,9 @@ class SoundcloudIE(SoundcloudBaseIE): 'like_count': int, 'comment_count': int, 'repost_count': int, + 'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg', + 'uploader_url': 'https://soundcloud.com/oriuplift', + 'genres': ['Trance'], }, }, # no album art, use avatar pic for thumbnail @@ -525,6 +575,8 @@ class SoundcloudIE(SoundcloudBaseIE): 'like_count': int, 'comment_count': int, 'repost_count': int, + 'uploader_url': 'https://soundcloud.com/garyvee', + 'genres': [], }, 'params': { 'skip_download': True, @@ -532,13 +584,13 @@ class SoundcloudIE(SoundcloudBaseIE): }, { 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer', - 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7', + 'md5': '8227c3473a4264df6b02ad7e5b7527ac', 'info_dict': { 'id': '583011102', - 'ext': 'mp3', + 'ext': 'opus', 'title': 'Mezzo Valzer', - 'description': 'md5:4138d582f81866a530317bae316e8b61', - 'uploader': 'Micronie', + 'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a', + 'uploader': 'Giovanni Sarani', 'uploader_id': '3352531', 'timestamp': 1551394171, 'upload_date': '20190228', @@ -549,6 +601,8 @@ class SoundcloudIE(SoundcloudBaseIE): 'like_count': int, 'comment_count': int, 'repost_count': int, + 'genres': ['Piano'], + 'uploader_url': 'https://soundcloud.com/giovannisarani', }, }, { |