diff options
Diffstat (limited to '')
-rw-r--r-- | yt_dlp/extractor/ximalaya.py | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py new file mode 100644 index 0000000..ff18ba6 --- /dev/null +++ b/yt_dlp/extractor/ximalaya.py @@ -0,0 +1,167 @@ +import math + +from .common import InfoExtractor +from ..utils import traverse_obj, try_call, InAdvancePagedList + + +class XimalayaBaseIE(InfoExtractor): + _GEO_COUNTRIES = ['CN'] + + +class XimalayaIE(XimalayaBaseIE): + IE_NAME = 'ximalaya' + IE_DESC = '喜马拉雅FM' + _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(:?(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)' + _TESTS = [ + { + 'url': 'http://www.ximalaya.com/sound/47740352/', + 'info_dict': { + 'id': '47740352', + 'ext': 'm4a', + 'uploader': '小彬彬爱听书', + 'uploader_id': 61425525, + 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/', + 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白', + 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。", + 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': [ + { + 'name': 'cover_url', + 'url': r're:^https?://.*\.jpg', + }, + { + 'name': 'cover_url_142', + 'url': r're:^https?://.*\.jpg', + 'width': 180, + 'height': 180 + } + ], + 'categories': ['其他'], + 'duration': 93, + 'view_count': int, + 'like_count': int, + } + }, + { + 'url': 'http://m.ximalaya.com/61425525/sound/47740352/', + 'info_dict': { + 'id': '47740352', + 'ext': 'm4a', + 'uploader': '小彬彬爱听书', + 'uploader_id': 61425525, + 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/', + 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白', + 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。", + 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': [ + { + 'name': 'cover_url', + 'url': r're:^https?://.*\.jpg', + }, + { + 'name': 'cover_url_142', + 'url': r're:^https?://.*\.jpg', + 'width': 180, + 'height': 180 + } + ], + 'categories': ['人文'], + 'duration': 93, + 'view_count': int, + 'like_count': int, + } + } + ] + + def _real_extract(self, url): + scheme = 'https' if url.startswith('https') else 'http' + + audio_id = self._match_id(url) + audio_info_file = '%s://m.ximalaya.com/tracks/%s.json' % (scheme, audio_id) + audio_info = self._download_json(audio_info_file, audio_id, + 'Downloading info json %s' % audio_info_file, + 'Unable to download info file') + + formats = [{ + 'format_id': f'{bps}k', + 'url': audio_info[k], + 'abr': bps, + 'vcodec': 'none' + } for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)] + + thumbnails = [] + for k in audio_info.keys(): + # cover pics kyes like: cover_url', 'cover_url_142' + if k.startswith('cover_url'): + thumbnail = {'name': k, 'url': audio_info[k]} + if k == 'cover_url_142': + thumbnail['width'] = 180 + thumbnail['height'] = 180 + thumbnails.append(thumbnail) + + audio_uploader_id = audio_info.get('uid') + + audio_description = try_call( + lambda: audio_info['intro'].replace('\r\n\r\n\r\n ', '\n').replace('\r\n', '\n')) + + return { + 'id': audio_id, + 'uploader': audio_info.get('nickname'), + 'uploader_id': audio_uploader_id, + 'uploader_url': f'{scheme}://www.ximalaya.com/zhubo/{audio_uploader_id}/' if audio_uploader_id else None, + 'title': audio_info['title'], + 'thumbnails': thumbnails, + 'description': audio_description, + 'categories': list(filter(None, [audio_info.get('category_name')])), + 'duration': audio_info.get('duration'), + 'view_count': audio_info.get('play_count'), + 'like_count': audio_info.get('favorites_count'), + 'formats': formats, + } + + +class XimalayaAlbumIE(XimalayaBaseIE): + IE_NAME = 'ximalaya:album' + IE_DESC = '喜马拉雅FM 专辑' + _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:\d+/)?album/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.ximalaya.com/61425525/album/5534601/', + 'info_dict': { + 'title': '唐诗三百首(含赏析)', + 'id': '5534601', + }, + 'playlist_mincount': 323, + }, { + 'url': 'https://www.ximalaya.com/album/6912905', + 'info_dict': { + 'title': '埃克哈特《修炼当下的力量》', + 'id': '6912905', + }, + 'playlist_mincount': 41, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + first_page = self._fetch_page(playlist_id, 1) + page_count = math.ceil(first_page['trackTotalCount'] / first_page['pageSize']) + + entries = InAdvancePagedList( + lambda idx: self._get_entries(self._fetch_page(playlist_id, idx + 1) if idx else first_page), + page_count, first_page['pageSize']) + + title = traverse_obj(first_page, ('tracks', 0, 'albumTitle'), expected_type=str) + + return self.playlist_result(entries, playlist_id, title) + + def _fetch_page(self, playlist_id, page_idx): + return self._download_json( + 'https://www.ximalaya.com/revision/album/v1/getTracksList', + playlist_id, note=f'Downloading tracks list page {page_idx}', + query={'albumId': playlist_id, 'pageNum': page_idx, 'sort': 1})['data'] + + def _get_entries(self, page_data): + for e in page_data['tracks']: + yield self.url_result( + self._proto_relative_url(f'//www.ximalaya.com{e["url"]}'), + XimalayaIE, e.get('trackId'), e.get('title')) |