summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/tiktok.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--yt_dlp/extractor/tiktok.py19
1 files changed, 16 insertions, 3 deletions
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index aa1dcec..9d823a3 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -23,7 +23,6 @@ from ..utils import (
mimetype2ext,
parse_qs,
qualities,
- remove_start,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
@@ -254,7 +253,16 @@ class TikTokBaseIE(InfoExtractor):
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
# TODO: Extract text positioning info
+
+ EXT_MAP = { # From lowest to highest preference
+ 'creator_caption': 'json',
+ 'srt': 'srt',
+ 'webvtt': 'vtt',
+ }
+ preference = qualities(tuple(EXT_MAP.values()))
+
subtitles = {}
+
# aweme/detail endpoint subs
captions_info = traverse_obj(
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
@@ -278,8 +286,8 @@ class TikTokBaseIE(InfoExtractor):
if not caption.get('url'):
continue
subtitles.setdefault(caption.get('lang') or 'en', []).append({
- 'ext': remove_start(caption.get('caption_format'), 'web'),
'url': caption['url'],
+ 'ext': EXT_MAP.get(caption.get('Format')),
})
# webpage subs
if not subtitles:
@@ -288,9 +296,14 @@ class TikTokBaseIE(InfoExtractor):
self._create_url(user_name, aweme_id), aweme_id, fatal=False)
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
- 'ext': remove_start(caption.get('Format'), 'web'),
'url': caption['Url'],
+ 'ext': EXT_MAP.get(caption.get('Format')),
})
+
+ # Deprioritize creator_caption json since it can't be embedded or used by media players
+ for lang, subs_list in subtitles.items():
+ subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext']))
+
return subtitles
def _parse_url_key(self, url_key):