summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/bilibili.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/bilibili.py')
-rw-r--r--yt_dlp/extractor/bilibili.py494
1 files changed, 331 insertions, 163 deletions
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index b38c90b..a84b7a6 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -31,12 +31,12 @@ from ..utils import (
mimetype2ext,
parse_count,
parse_qs,
+ parse_resolution,
qualities,
smuggle_url,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
- try_call,
unified_timestamp,
unsmuggle_url,
url_or_none,
@@ -47,6 +47,23 @@ from ..utils import (
class BilibiliBaseIE(InfoExtractor):
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
+ _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
+ _wbi_key_cache = {}
+
+ @property
+ def is_logged_in(self):
+ return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
+
+ def _check_missing_formats(self, play_info, formats):
+ parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
+ missing_formats = join_nonempty(*[
+ traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
+ for fmt in traverse_obj(play_info, (
+ 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
+ if missing_formats:
+ self.to_screen(
+ f'Format(s) {missing_formats} are missing; you have to login or '
+ f'become a premium member to download them. {self._login_hint()}')
def extract_formats(self, play_info):
format_names = {
@@ -86,18 +103,75 @@ class BilibiliBaseIE(InfoExtractor):
'format': format_names.get(video.get('id')),
} for video in traverse_obj(play_info, ('dash', 'video', ...)))
- missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
- if missing_formats:
- self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
- f'you have to login or become premium member to download them. {self._login_hint()}')
+ if formats:
+ self._check_missing_formats(play_info, formats)
+ fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
+ 'url': ('url', {url_or_none}),
+ 'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
+ 'filesize': ('size', {int_or_none}),
+ }))
+ if fragments:
+ formats.append({
+ 'url': fragments[0]['url'],
+ 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
+ **({
+ 'fragments': fragments,
+ 'protocol': 'http_dash_segments',
+ } if len(fragments) > 1 else {}),
+ **traverse_obj(play_info, {
+ 'quality': ('quality', {int_or_none}),
+ 'format_id': ('quality', {str_or_none}),
+ 'format_note': ('quality', {lambda x: format_names.get(x)}),
+ 'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
+ }),
+ **parse_resolution(format_names.get(play_info.get('quality'))),
+ })
return formats
- def _download_playinfo(self, video_id, cid, headers=None):
+ def _get_wbi_key(self, video_id):
+ if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
+ return self._wbi_key_cache['key']
+
+ session_data = self._download_json(
+ 'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
+
+ lookup = ''.join(traverse_obj(session_data, (
+ 'data', 'wbi_img', ('img_url', 'sub_url'),
+ {lambda x: x.rpartition('/')[2].partition('.')[0]})))
+
+ # from getMixinKey() in the vendor js
+ mixin_key_enc_tab = [
+ 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
+ 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
+ 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
+ 36, 20, 34, 44, 52,
+ ]
+
+ self._wbi_key_cache.update({
+ 'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
+ 'ts': time.time(),
+ })
+ return self._wbi_key_cache['key']
+
+ def _sign_wbi(self, params, video_id):
+ params['wts'] = round(time.time())
+ params = {
+ k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
+ for k, v in sorted(params.items())
+ }
+ query = urllib.parse.urlencode(params)
+ params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
+ return params
+
+ def _download_playinfo(self, bvid, cid, headers=None, qn=None):
+ params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
+ if qn:
+ params['qn'] = qn
return self._download_json(
- 'https://api.bilibili.com/x/player/playurl', video_id,
- query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
- note=f'Downloading video formats for cid {cid}', headers=headers)['data']
+ 'https://api.bilibili.com/x/player/wbi/playurl', bvid,
+ query=self._sign_wbi(params, bvid), headers=headers,
+ note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
def json2srt(self, json_data):
srt_data = ''
@@ -112,21 +186,21 @@ class BilibiliBaseIE(InfoExtractor):
'danmaku': [{
'ext': 'xml',
'url': f'https://comment.bilibili.com/{cid}.xml',
- }]
+ }],
}
- subtitle_info = traverse_obj(self._download_json(
+ video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
- note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
- subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
- if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
- if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
- self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
- for s in subs_list:
+ note=f'Extracting subtitle info {cid}')
+ if traverse_obj(video_info, ('data', 'need_login_subtitle')):
+ self.report_warning(
+ f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
+ for s in traverse_obj(video_info, (
+ 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
subtitles.setdefault(s['lan'], []).append({
'ext': 'srt',
- 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
+ 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
})
return subtitles
@@ -203,19 +277,19 @@ class BilibiliBaseIE(InfoExtractor):
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
return cid_edges
- def _get_interactive_entries(self, video_id, cid, metainfo):
+ def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
graph_version = traverse_obj(
self._download_json(
'https://api.bilibili.com/x/player/wbi/v2', video_id,
- 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
+ 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items():
- play_info = self._download_playinfo(video_id, cid)
+ play_info = self._download_playinfo(video_id, cid, headers=headers)
yield {
**metainfo,
'id': f'{video_id}_{cid}',
- 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
+ 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
'formats': self.extract_formats(play_info),
'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
'duration': float_or_none(play_info.get('timelength'), scale=1000),
@@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE):
'timestamp': 1488353834,
'like_count': int,
'view_count': int,
+ '_old_archive_ids': ['bilibili 8903802_part1'],
},
}, {
'note': 'old av URL version',
'url': 'http://www.bilibili.com/video/av1074402/',
'info_dict': {
- 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
+ 'id': 'BV11x411K7CN',
'ext': 'mp4',
+ 'title': '【金坷垃】金泡沫',
'uploader': '菊子桑',
'uploader_id': '156160',
- 'id': 'BV11x411K7CN',
- 'title': '【金坷垃】金泡沫',
'duration': 308.36,
'upload_date': '20140420',
'timestamp': 1397983878,
@@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int,
'view_count': int,
'tags': list,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
+ '_old_archive_ids': ['bilibili 1074402_part1'],
},
'params': {'skip_download': True},
}, {
@@ -269,7 +345,7 @@ class BiliBiliIE(BilibiliBaseIE):
'url': 'https://www.bilibili.com/video/BV1bK411W797',
'info_dict': {
'id': 'BV1bK411W797',
- 'title': '物语中的人物是如何吐槽自己的OP的'
+ 'title': '物语中的人物是如何吐槽自己的OP的',
},
'playlist_count': 18,
'playlist': [{
@@ -288,8 +364,9 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
- }
- }]
+ '_old_archive_ids': ['bilibili 498159642_part1'],
+ },
+ }],
}, {
'note': 'Specific page of Anthology',
'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
@@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314,
- }
- }, {
- 'note': 'video has subtitles',
- 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
- 'info_dict': {
- 'id': 'BV12N4y1M7rh',
- 'ext': 'mp4',
- 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
- 'tags': list,
- 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
- 'duration': 313.557,
- 'upload_date': '20220709',
- 'uploader': '小夫太渴',
- 'timestamp': 1657347907,
- 'uploader_id': '1326814124',
- 'comment_count': int,
- 'view_count': int,
- 'like_count': int,
- 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
- 'subtitles': 'count:2'
+ '_old_archive_ids': ['bilibili 498159642_part1'],
},
- 'params': {'listsubtitles': True},
}, {
'url': 'https://www.bilibili.com/video/av8903802/',
'info_dict': {
@@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int,
'view_count': int,
'like_count': int,
+ '_old_archive_ids': ['bilibili 8903802_part1'],
},
'params': {
'skip_download': True,
@@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ '_old_archive_ids': ['bilibili 463665680_part1'],
},
'params': {'skip_download': True},
}, {
@@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ '_old_archive_ids': ['bilibili 893839363_part1'],
},
- 'params': {'skip_download': True},
}, {
'note': 'newer festival video',
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
@@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ '_old_archive_ids': ['bilibili 778246196_part1'],
+ },
+ }, {
+ 'note': 'legacy flv/mp4 video',
+ 'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
+ 'info_dict': {
+ 'id': 'BV1ms411Q7vw_p4',
+ 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
+ 'timestamp': 1458222815,
+ 'upload_date': '20160317',
+ 'description': '云南方言快乐生产线出品',
+ 'duration': float,
+ 'uploader': '一笑颠天',
+ 'uploader_id': '3916081',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
+ 'tags': list,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ '_old_archive_ids': ['bilibili 4120229_part4'],
+ },
+ 'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
+ 'playlist_count': 19,
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'BV1ms411Q7vw_p4_0',
+ 'ext': 'flv',
+ 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
+ 'duration': 399.102,
+ },
+ }],
+ }, {
+ 'note': 'legacy mp4-only video',
+ 'url': 'https://www.bilibili.com/video/BV1nx411u79K',
+ 'info_dict': {
+ 'id': 'BV1nx411u79K',
+ 'ext': 'mp4',
+ 'title': '【练习室】201603声乐练习《No Air》with VigoVan',
+ 'timestamp': 1508893551,
+ 'upload_date': '20171025',
+ 'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
+ 'duration': 80.384,
+ 'uploader': '伯远',
+ 'uploader_id': '10584494',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'tags': list,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ '_old_archive_ids': ['bilibili 15700301_part1'],
},
- 'params': {'skip_download': True},
}, {
'note': 'interactive/split-path video',
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
@@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ '_old_archive_ids': ['bilibili 292734508_part1'],
},
'playlist_count': 33,
'playlist': [{
@@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ '_old_archive_ids': ['bilibili 292734508_part1'],
},
}],
}, {
@@ -466,6 +576,29 @@ class BiliBiliIE(BilibiliBaseIE):
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}, {
+ 'note': 'video has subtitles, which requires login',
+ 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
+ 'info_dict': {
+ 'id': 'BV12N4y1M7rh',
+ 'ext': 'mp4',
+ 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
+ 'tags': list,
+ 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
+ 'duration': 313.557,
+ 'upload_date': '20220709',
+ 'uploader': '小夫太渴',
+ 'timestamp': 1657347907,
+ 'uploader_id': '1326814124',
+ 'comment_count': int,
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ 'subtitles': 'count:2', # login required for CC subtitle
+ '_old_archive_ids': ['bilibili 898179753_part1'],
+ },
+ 'params': {'listsubtitles': True},
+ 'skip': 'login required for subtitle',
+ }, {
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
'info_dict': {
'id': 'BV1jL41167ZG',
@@ -498,8 +631,9 @@ class BiliBiliIE(BilibiliBaseIE):
if not self._match_valid_url(urlh.url):
return self.url_result(urlh.url)
- initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
+ headers['Referer'] = url
+ initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
is_festival = 'videoData' not in initial_state
if is_festival:
video_data = initial_state['videoInfo']
@@ -548,7 +682,6 @@ class BiliBiliIE(BilibiliBaseIE):
aid = video_data.get('aid')
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
-
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
festival_info = {}
@@ -586,19 +719,65 @@ class BiliBiliIE(BilibiliBaseIE):
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
if is_interactive:
return self.playlist_result(
- self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
- 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
- '__post_extractor': self.extract_comments(aid),
- })
+ self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
+ duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
+ __post_extractor=self.extract_comments(aid))
else:
- return {
- **metainfo,
- 'duration': float_or_none(play_info.get('timelength'), scale=1000),
- 'chapters': self._get_chapters(aid, cid),
- 'subtitles': self.extract_subtitles(video_id, cid),
- 'formats': self.extract_formats(play_info),
- '__post_extractor': self.extract_comments(aid),
- }
+ formats = self.extract_formats(play_info)
+
+ if not traverse_obj(play_info, ('dash')):
+ # we only have legacy formats and need additional work
+ has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
+ for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
+ formats.extend(traverse_obj(
+ self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
+ lambda _, v: not has_qn(v['quality'])))
+ self._check_missing_formats(play_info, formats)
+ flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
+ if flv_formats and len(flv_formats) < len(formats):
+ # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
+ if not self._configuration_arg('prefer_multi_flv'):
+ dropped_fmts = ', '.join(
+ f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
+ formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
+ if dropped_fmts:
+ self.to_screen(
+ f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
+ 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
+ else:
+ formats = traverse_obj(
+ # XXX: Filtering by extractor-arg is for testing purposes
+ formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
+ ) or [max(flv_formats, key=lambda x: x['quality'])]
+
+ if traverse_obj(formats, (0, 'fragments')):
+ # We have flv formats, which are individual short videos with their own timestamps and metainfo
+ # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
+ return {
+ **metainfo,
+ '_type': 'multi_video',
+ 'entries': [{
+ 'id': f'{metainfo["id"]}_{idx}',
+ 'title': metainfo['title'],
+ 'http_headers': metainfo['http_headers'],
+ 'formats': [{
+ **fragment,
+ 'format_id': formats[0].get('format_id'),
+ }],
+ 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
+ '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
+ } for idx, fragment in enumerate(formats[0]['fragments'])],
+ 'duration': float_or_none(play_info.get('timelength'), scale=1000),
+ }
+ else:
+ return {
+ **metainfo,
+ 'formats': formats,
+ 'duration': float_or_none(play_info.get('timelength'), scale=1000),
+ 'chapters': self._get_chapters(aid, cid),
+ 'subtitles': self.extract_subtitles(video_id, cid),
+ '__post_extractor': self.extract_comments(aid),
+ }
class BiliBiliBangumiIE(BilibiliBaseIE):
@@ -640,7 +819,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'duration': 1425.256,
'timestamp': 1554566400,
'upload_date': '20190406',
- 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
'skip': 'Geo-restricted',
}, {
@@ -661,7 +840,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
'duration': 1922.129,
'timestamp': 1602853860,
'upload_date': '20201016',
- 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}]
@@ -764,7 +943,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE):
'duration': 1525.777,
'timestamp': 1425074413,
'upload_date': '20150227',
- 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}],
}]
@@ -794,7 +973,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
'title': '鬼灭之刃',
'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
},
- 'playlist_mincount': 26
+ 'playlist_mincount': 26,
}, {
'url': 'https://www.bilibili.com/bangumi/play/ss2251',
'info_dict': {
@@ -819,7 +998,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
'duration': 1436.992,
'timestamp': 1343185080,
'upload_date': '20120725',
- 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
}],
}]
@@ -906,7 +1085,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE):
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
- }
+ },
}]
def _real_extract(self, url):
@@ -939,7 +1118,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
'upload_date': '20230924',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'view_count': int,
- }
+ },
}],
'params': {'playlist_items': '1'},
}, {
@@ -969,7 +1148,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
}))
-class BilibiliSpaceBaseIE(InfoExtractor):
+class BilibiliSpaceBaseIE(BilibiliBaseIE):
def _extract_playlist(self, fetch_page, get_metadata, get_entries):
first_page = fetch_page(0)
metadata = get_metadata(first_page)
@@ -989,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
'id': '3985676',
},
'playlist_mincount': 178,
+ 'skip': 'login required',
}, {
'url': 'https://space.bilibili.com/313580179/video',
'info_dict': {
'id': '313580179',
},
'playlist_mincount': 92,
+ 'skip': 'login required',
}]
- def _extract_signature(self, playlist_id):
- session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
-
- key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
- img_key = traverse_obj(
- session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
- sub_key = traverse_obj(
- session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
-
- session_key = img_key + sub_key
-
- signature_values = []
- for position in (
- 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
- 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
- 57, 62, 11, 36, 20, 34, 44, 52
- ):
- char_at_position = try_call(lambda: session_key[position])
- if char_at_position:
- signature_values.append(char_at_position)
-
- return ''.join(signature_values)[:32]
-
def _real_extract(self, url):
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
if not is_video_url:
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
'To download audios, add a "/audio" to the URL')
- signature = self._extract_signature(playlist_id)
-
def fetch_page(page_idx):
query = {
'keyword': '',
'mid': playlist_id,
- 'order': 'pubdate',
+ 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
'order_avoided': 'true',
'platform': 'web',
'pn': page_idx + 1,
'ps': 30,
'tid': 0,
'web_location': 1550101,
- 'wts': int(time.time()),
}
- query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
try:
- response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
- playlist_id, note=f'Downloading page {page_idx}', query=query,
- headers={'referer': url})
+ response = self._download_json(
+ 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
+ query=self._sign_wbi(query, playlist_id),
+ note=f'Downloading space page {page_idx}', headers={'Referer': url})
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise
- if response['code'] in (-352, -401):
+ status_code = response['code']
+ if status_code == -401:
raise ExtractorError(
- f'Request is blocked by server ({-response["code"]}), '
- 'please add cookies, wait and try later.', expected=True)
+ 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+ elif status_code == -352 and not self.is_logged_in:
+ self.raise_login_required('Request is rejected, you need to login to access playlist')
+ elif status_code != 0:
+ raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
return response['data']
def get_metadata(page_data):
@@ -1163,7 +1322,7 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
'uploader_id': ('meta', 'mid', {str_or_none}),
'timestamp': ('meta', 'ptime', {int_or_none}),
'thumbnail': ('meta', 'cover', {url_or_none}),
- })
+ }),
}
def get_entries(page_data):
@@ -1195,7 +1354,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
mid, sid = self._match_valid_url(url).group('mid', 'sid')
playlist_id = f'{mid}_{sid}'
playlist_meta = traverse_obj(self._download_json(
- f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
+ f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
), {
'title': ('data', 'meta', 'name', {str}),
'description': ('data', 'meta', 'description', {str}),
@@ -1217,7 +1376,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
'page_count': math.ceil(entry_count / page_size),
'page_size': page_size,
'uploader': self._get_uploader(mid, playlist_id),
- **playlist_meta
+ **playlist_meta,
}
def get_entries(page_data):
@@ -1241,7 +1400,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
'upload_date': '20201109',
'modified_timestamp': int,
'modified_date': str,
- 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
+ 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
'view_count': int,
'like_count': int,
},
@@ -1281,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://www.bilibili.com/watchlater/#/list',
- 'info_dict': {'id': 'watchlater'},
+ 'info_dict': {
+ 'id': r're:\d+',
+ 'title': '稍后再看',
+ },
'playlist_mincount': 0,
'skip': 'login required',
}]
@@ -1345,7 +1507,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'uploader_id': '84912',
'timestamp': 1604905176,
'upload_date': '20201109',
- 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
+ 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
},
'playlist_mincount': 22,
}, {
@@ -1357,21 +1519,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'skip': 'redirect url',
}, {
'url': 'https://www.bilibili.com/list/watchlater',
- 'info_dict': {'id': 'watchlater'},
+ 'info_dict': {
+ 'id': r're:2_\d+',
+ 'title': '稍后再看',
+ 'uploader': str,
+ 'uploader_id': str,
+ },
'playlist_mincount': 0,
'skip': 'login required',
}, {
'url': 'https://www.bilibili.com/medialist/play/watchlater',
'info_dict': {'id': 'watchlater'},
'playlist_mincount': 0,
- 'skip': 'login required',
+ 'skip': 'redirect url & login required',
}]
def _extract_medialist(self, query, list_id):
for page_num in itertools.count(1):
page_data = self._download_json(
'https://api.bilibili.com/x/v2/medialist/resource/list',
- list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
+ list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
)['data']
yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
@@ -1407,7 +1574,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'tid': ('tid', {int_or_none}),
'sort_field': ('sortFiled', {int_or_none}),
'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
- })
+ }),
}
metadata = {
'id': f'{query["type"]}_{query["biz_id"]}',
@@ -1415,7 +1582,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'title': ('title', {str}),
'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}),
- 'timestamp': ('ctime', {int_or_none}),
+ 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
'thumbnail': ('cover', {url_or_none}),
})),
}
@@ -1430,26 +1597,26 @@ class BilibiliCategoryIE(InfoExtractor):
'url': 'https://www.bilibili.com/v/kichiku/mad',
'info_dict': {
'id': 'kichiku: mad',
- 'title': 'kichiku: mad'
+ 'title': 'kichiku: mad',
},
'playlist_mincount': 45,
'params': {
- 'playlistend': 45
- }
+ 'playlistend': 45,
+ },
}]
def _fetch_page(self, api_url, num_pages, query, page_num):
parsed_json = self._download_json(
api_url, query, query={'Search_key': query, 'pn': page_num},
- note='Extracting results from page %s of %s' % (page_num, num_pages))
+ note=f'Extracting results from page {page_num} of {num_pages}')
video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
if not video_list:
- raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
+ raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
for video in video_list:
yield self.url_result(
- 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
+ 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
def _entries(self, category, subcategory, query):
# map of categories : subcategories : RIDs
@@ -1459,7 +1626,7 @@ class BilibiliCategoryIE(InfoExtractor):
'manual_vocaloid': 126,
'guide': 22,
'theatre': 216,
- 'course': 127
+ 'course': 127,
},
}
@@ -1485,7 +1652,7 @@ class BilibiliCategoryIE(InfoExtractor):
def _real_extract(self, url):
category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
- query = '%s: %s' % (category, subcategory)
+ query = f'{category}: {subcategory}'
return self.playlist_result(self._entries(category, subcategory, query), query, query)
@@ -1588,7 +1755,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
formats = [{
'url': play_data['cdns'][0],
'filesize': int_or_none(play_data.get('size')),
- 'vcodec': 'none'
+ 'vcodec': 'none',
}]
for a_format in formats:
@@ -1606,7 +1773,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
subtitles = {
'origin': [{
'url': lyric,
- }]
+ }],
}
return {
@@ -1674,7 +1841,7 @@ class BiliBiliPlayerIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
return self.url_result(
- 'http://www.bilibili.tv/video/av%s/' % video_id,
+ f'http://www.bilibili.tv/video/av{video_id}/',
ie=BiliBiliIE.ie_key(), video_id=video_id)
@@ -1702,11 +1869,10 @@ class BiliIntlBaseIE(InfoExtractor):
return json.get('data')
def json2srt(self, json):
- data = '\n\n'.join(
+ return '\n\n'.join(
f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
for i, line in enumerate(traverse_obj(json, (
'body', lambda _, l: l['content'] and l['from'] and l['to']))))
- return data
def _get_subtitles(self, *, ep_id=None, aid=None):
sub_json = self._call_api(
@@ -1808,14 +1974,15 @@ class BiliIntlBaseIE(InfoExtractor):
note='Downloading login key', errnote='Unable to download login key')['data']
public_key = Cryptodome.RSA.importKey(key_data['key'])
- password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
+ password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
login_post = self._download_json(
- 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
+ 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
+ data=urlencode_postdata({
'username': username,
'password': base64.b64encode(password_hash).decode('ascii'),
'keep_me': 'true',
's_locale': 'en_US',
- 'isTrusted': 'true'
+ 'isTrusted': 'true',
}), note='Logging in', errnote='Unable to log in')
if login_post.get('code'):
if login_post.get('message'):
@@ -1842,17 +2009,17 @@ class BiliIntlIE(BiliIntlBaseIE):
'chapters': [{
'start_time': 0,
'end_time': 76.242,
- 'title': '<Untitled Chapter 1>'
+ 'title': '<Untitled Chapter 1>',
}, {
'start_time': 76.242,
'end_time': 161.161,
- 'title': 'Intro'
+ 'title': 'Intro',
}, {
'start_time': 1325.742,
'end_time': 1403.903,
- 'title': 'Outro'
+ 'title': 'Outro',
}],
- }
+ },
}, {
# Non-Bstation page
'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
@@ -1869,17 +2036,17 @@ class BiliIntlIE(BiliIntlBaseIE):
'chapters': [{
'start_time': 0,
'end_time': 88.0,
- 'title': '<Untitled Chapter 1>'
+ 'title': '<Untitled Chapter 1>',
}, {
'start_time': 88.0,
'end_time': 156.0,
- 'title': 'Intro'
+ 'title': 'Intro',
}, {
'start_time': 1173.0,
'end_time': 1259.535,
- 'title': 'Outro'
+ 'title': 'Outro',
}],
- }
+ },
}, {
# Subtitle with empty content
'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
@@ -1890,7 +2057,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
'episode_number': 140,
},
- 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
+ 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
}, {
# episode comment extraction
'url': 'https://www.bilibili.tv/en/play/34580/340317',
@@ -1908,20 +2075,20 @@ class BiliIntlIE(BiliIntlBaseIE):
'chapters': [{
'start_time': 0,
'end_time': 61.0,
- 'title': '<Untitled Chapter 1>'
+ 'title': '<Untitled Chapter 1>',
}, {
'start_time': 61.0,
'end_time': 134.0,
- 'title': 'Intro'
+ 'title': 'Intro',
}, {
'start_time': 1290.0,
'end_time': 1379.0,
- 'title': 'Outro'
+ 'title': 'Outro',
}],
},
'params': {
- 'getcomments': True
- }
+ 'getcomments': True,
+ },
}, {
# user generated content comment extraction
'url': 'https://www.bilibili.tv/en/video/2045730385',
@@ -1936,8 +2103,8 @@ class BiliIntlIE(BiliIntlBaseIE):
'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
},
'params': {
- 'getcomments': True
- }
+ 'getcomments': True,
+ },
}, {
# episode id without intro and outro
'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
@@ -1992,7 +2159,7 @@ class BiliIntlIE(BiliIntlBaseIE):
# Non-Bstation layout, read through episode list
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
video_data = traverse_obj(season_json, (
- 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
+ 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
), expected_type=dict, get_all=False)
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
@@ -2024,7 +2191,7 @@ class BiliIntlIE(BiliIntlBaseIE):
'id': replies.get('rpid'),
'like_count': int_or_none(replies.get('like_count')),
'parent': replies.get('parent'),
- 'timestamp': unified_timestamp(replies.get('ctime_text'))
+ 'timestamp': unified_timestamp(replies.get('ctime_text')),
}
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
@@ -2077,11 +2244,11 @@ class BiliIntlIE(BiliIntlBaseIE):
chapters = [{
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
- 'title': 'Intro'
+ 'title': 'Intro',
}, {
'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
- 'title': 'Outro'
+ 'title': 'Outro',
}]
return {
@@ -2137,12 +2304,13 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
episode_id = str(episode['episode_id'])
yield self.url_result(smuggle_url(
BiliIntlIE._make_url(episode_id, series_id),
- self._parse_video_metadata(episode)
+ self._parse_video_metadata(episode),
), BiliIntlIE, episode_id)
def _real_extract(self, url):
series_id = self._match_id(url)
- series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
+ series_info = self._call_api(
+ f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
return self.playlist_result(
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
@@ -2156,19 +2324,19 @@ class BiliLiveIE(InfoExtractor):
'url': 'https://live.bilibili.com/196',
'info_dict': {
'id': '33989',
- 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
+ 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
'ext': 'flv',
- 'title': "太空狼人杀联动,不被爆杀就算赢",
- 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
+ 'title': '太空狼人杀联动,不被爆杀就算赢',
+ 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
'timestamp': 1650802769,
},
- 'skip': 'not live'
+ 'skip': 'not live',
}, {
'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
- 'only_matching': True
+ 'only_matching': True,
}, {
'url': 'https://live.bilibili.com/blanc/196',
- 'only_matching': True
+ 'only_matching': True,
}]
_FORMATS = {
@@ -2209,7 +2377,7 @@ class BiliLiveIE(InfoExtractor):
raise ExtractorError('Streamer is not live', expected=True)
formats = []
- for qn in self._FORMATS.keys():
+ for qn in self._FORMATS:
stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
'room_id': room_id,
'qn': qn,