summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/tiktok.py
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 09:07:33 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 09:07:33 +0000
commit1719c758f7efdca9476d2a674b659191dee6344f (patch)
treed378103d8967450628b7254d9ec1f4f91993bd04 /yt_dlp/extractor/tiktok.py
parentAdding debian version 2024.07.16-1. (diff)
downloadyt-dlp-1719c758f7efdca9476d2a674b659191dee6344f.tar.xz
yt-dlp-1719c758f7efdca9476d2a674b659191dee6344f.zip
Merging upstream version 2024.07.25.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'yt_dlp/extractor/tiktok.py')
-rw-r--r--yt_dlp/extractor/tiktok.py19
1 files changed, 16 insertions, 3 deletions
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index aa1dcec..9d823a3 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -23,7 +23,6 @@ from ..utils import (
mimetype2ext,
parse_qs,
qualities,
- remove_start,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
@@ -254,7 +253,16 @@ class TikTokBaseIE(InfoExtractor):
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
# TODO: Extract text positioning info
+
+ EXT_MAP = { # From lowest to highest preference
+ 'creator_caption': 'json',
+ 'srt': 'srt',
+ 'webvtt': 'vtt',
+ }
+ preference = qualities(tuple(EXT_MAP.values()))
+
subtitles = {}
+
# aweme/detail endpoint subs
captions_info = traverse_obj(
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
@@ -278,8 +286,8 @@ class TikTokBaseIE(InfoExtractor):
if not caption.get('url'):
continue
subtitles.setdefault(caption.get('lang') or 'en', []).append({
- 'ext': remove_start(caption.get('caption_format'), 'web'),
'url': caption['url'],
+ 'ext': EXT_MAP.get(caption.get('Format')),
})
# webpage subs
if not subtitles:
@@ -288,9 +296,14 @@ class TikTokBaseIE(InfoExtractor):
self._create_url(user_name, aweme_id), aweme_id, fatal=False)
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
- 'ext': remove_start(caption.get('Format'), 'web'),
'url': caption['Url'],
+ 'ext': EXT_MAP.get(caption.get('Format')),
})
+
+ # Deprioritize creator_caption json since it can't be embedded or used by media players
+ for lang, subs_list in subtitles.items():
+ subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext']))
+
return subtitles
def _parse_url_key(self, url_key):