summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/tiktok.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/tiktok.py')
-rw-r--r--yt_dlp/extractor/tiktok.py89
1 files changed, 53 insertions, 36 deletions
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 7bcfded..c3505b1 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -5,10 +5,10 @@ import random
import re
import string
import time
+import urllib.parse
import uuid
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlparse
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
@@ -30,6 +30,7 @@ from ..utils import (
try_call,
try_get,
url_or_none,
+ urlencode_postdata,
)
@@ -43,8 +44,8 @@ class TikTokBaseIE(InfoExtractor):
'iid': None,
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
'app_name': 'musical_ly',
- 'app_version': '34.1.2',
- 'manifest_app_version': '2023401020',
+ 'app_version': '35.1.3',
+ 'manifest_app_version': '2023501030',
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
'aid': '0',
}
@@ -114,18 +115,19 @@ class TikTokBaseIE(InfoExtractor):
'universal data', display_id, end_pattern=r'</script>', default={}),
('__DEFAULT_SCOPE__', {dict})) or {}
- def _call_api_impl(self, ep, query, video_id, fatal=True,
+ def _call_api_impl(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
if webpage_cookies.get('sid_tt'):
self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value)
return self._download_json(
- 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
+ f'https://{self._API_HOSTNAME}/aweme/v1/{ep}/', video_id=video_id,
fatal=fatal, note=note, errnote=errnote, headers={
'User-Agent': self._APP_USER_AGENT,
'Accept': 'application/json',
- }, query=query)
+ **(headers or {}),
+ }, query=query, data=data)
def _build_api_query(self, query):
return filter_dict({
@@ -138,7 +140,7 @@ class TikTokBaseIE(InfoExtractor):
'channel': 'googleplay',
'aid': self._APP_INFO['aid'],
'app_name': self._APP_INFO['app_name'],
- 'version_code': ''.join((f'{int(v):02d}' for v in self._APP_INFO['app_version'].split('.'))),
+ 'version_code': ''.join(f'{int(v):02d}' for v in self._APP_INFO['app_version'].split('.')),
'version_name': self._APP_INFO['app_version'],
'manifest_version_code': self._APP_INFO['manifest_app_version'],
'update_version_code': self._APP_INFO['manifest_app_version'],
@@ -174,7 +176,7 @@ class TikTokBaseIE(InfoExtractor):
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
})
- def _call_api(self, ep, query, video_id, fatal=True,
+ def _call_api(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
if not self._APP_INFO and not self._get_next_app_info():
message = 'No working app info is available'
@@ -187,9 +189,11 @@ class TikTokBaseIE(InfoExtractor):
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
for count in itertools.count(1):
self.write_debug(str(self._APP_INFO))
- real_query = self._build_api_query(query)
+ real_query = self._build_api_query(query or {})
try:
- return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
+ return self._call_api_impl(
+ ep, video_id, query=real_query, data=data, headers=headers,
+ fatal=fatal, note=note, errnote=errnote)
except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
message = str(e.cause or e.msg)
@@ -204,17 +208,29 @@ class TikTokBaseIE(InfoExtractor):
raise
def _extract_aweme_app(self, aweme_id):
- feed_list = self._call_api(
- 'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed',
- errnote='Unable to download video feed').get('aweme_list') or []
- aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
+ aweme_detail = traverse_obj(
+ self._call_api('multi/aweme/detail', aweme_id, data=urlencode_postdata({
+ 'aweme_ids': f'[{aweme_id}]',
+ 'request_source': '0',
+ }), headers={'X-Argus': ''}), ('aweme_details', 0, {dict}))
if not aweme_detail:
- raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
+ raise ExtractorError('Unable to extract aweme detail info', video_id=aweme_id)
return self._parse_aweme_video_app(aweme_detail)
def _extract_web_data_and_status(self, url, video_id, fatal=True):
- webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or ''
- video_data, status = {}, None
+ video_data, status = {}, -1
+
+ res = self._download_webpage_handle(url, video_id, fatal=fatal, headers={'User-Agent': 'Mozilla/5.0'})
+ if res is False:
+ return video_data, status
+
+ webpage, urlh = res
+ if urllib.parse.urlparse(urlh.url).path == '/login':
+ message = 'TikTok is requiring login for access to this content'
+ if fatal:
+ self.raise_login_required(message)
+ self.report_warning(f'{message}. {self._login_hint()}')
+ return video_data, status
if universal_data := self._get_universal_data(webpage, video_id):
self.write_debug('Found universal data for rehydration')
@@ -254,7 +270,7 @@ class TikTokBaseIE(InfoExtractor):
'ext': 'srt',
'data': '\n\n'.join(
f'{i + 1}\n{srt_subtitles_timecode(line["start_time"] / 1000)} --> {srt_subtitles_timecode(line["end_time"] / 1000)}\n{line["text"]}'
- for i, line in enumerate(caption_json['utterances']) if line.get('text'))
+ for i, line in enumerate(caption_json['utterances']) if line.get('text')),
})
# feed endpoint subs
if not subtitles:
@@ -382,7 +398,7 @@ class TikTokBaseIE(InfoExtractor):
auth_cookie = self._get_cookies(self._WEBPAGE_HOST).get('sid_tt')
if auth_cookie:
for f in formats:
- self._set_cookie(compat_urllib_parse_urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value)
+ self._set_cookie(urllib.parse.urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value)
thumbnails = []
for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
@@ -402,7 +418,7 @@ class TikTokBaseIE(InfoExtractor):
contained_music_author = traverse_obj(
music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str)
- is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle')
+ is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - {}'.format(music_info.get('owner_handle'))
if is_generic_og_trackname:
music_track, music_author = contained_music_track or 'original sound', contained_music_author
else:
@@ -792,7 +808,7 @@ class TikTokIE(TikTokBaseIE):
'expected_warnings': ['Unable to find video in feed'],
}, {
# 1080p format
- 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', # FIXME
+ 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', # FIXME: Web can only get audio
'md5': '982512017a8a917124d5a08c8ae79621',
'info_dict': {
'id': '7107337212743830830',
@@ -846,7 +862,7 @@ class TikTokIE(TikTokBaseIE):
}, {
# Auto-captions available
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
- 'only_matching': True
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -1026,7 +1042,8 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul
for retry in self.RetryManager():
try:
post_list = self._call_api(
- self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}',
+ self._API_ENDPOINT, display_id, query=query,
+ note=f'Downloading video list page {page}',
errnote='Unable to download video list')
except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
@@ -1059,17 +1076,17 @@ class TikTokSoundIE(TikTokBaseListIE):
'url': 'https://www.tiktok.com/music/Build-a-Btch-6956990112127585029?lang=en',
'playlist_mincount': 100,
'info_dict': {
- 'id': '6956990112127585029'
+ 'id': '6956990112127585029',
},
- 'expected_warnings': ['Retrying']
+ 'expected_warnings': ['Retrying'],
}, {
# Actual entries are less than listed video count
'url': 'https://www.tiktok.com/music/jiefei-soap-remix-7036843036118469381',
'playlist_mincount': 2182,
'info_dict': {
- 'id': '7036843036118469381'
+ 'id': '7036843036118469381',
},
- 'expected_warnings': ['Retrying']
+ 'expected_warnings': ['Retrying'],
}]
@@ -1085,11 +1102,11 @@ class TikTokEffectIE(TikTokBaseListIE):
'info_dict': {
'id': '1258156',
},
- 'expected_warnings': ['Retrying']
+ 'expected_warnings': ['Retrying'],
}, {
# Different entries between mobile and web, depending on region
'url': 'https://www.tiktok.com/sticker/Elf-Friend-479565',
- 'only_matching': True
+ 'only_matching': True,
}]
@@ -1106,16 +1123,16 @@ class TikTokTagIE(TikTokBaseListIE):
'id': '46294678',
'title': 'hello2018',
},
- 'expected_warnings': ['Retrying']
+ 'expected_warnings': ['Retrying'],
}, {
'url': 'https://tiktok.com/tag/fypシ?is_copy_url=0&is_from_webapp=v1',
- 'only_matching': True
+ 'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id, headers={
- 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
+ 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
})
tag_id = self._html_search_regex(r'snssdk\d*://challenge/detail/(\d+)', webpage, 'tag ID')
return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id)
@@ -1129,17 +1146,17 @@ class TikTokCollectionIE(TikTokBaseIE):
'url': 'https://www.tiktok.com/@imanoreotwe/collection/count-test-7371330159376370462',
'info_dict': {
'id': '7371330159376370462',
- 'title': 'imanoreotwe-count-test'
+ 'title': 'imanoreotwe-count-test',
},
- 'playlist_count': 9
+ 'playlist_count': 9,
}, {
# tests returning multiple pages of a large collection
'url': 'https://www.tiktok.com/@imanoreotwe/collection/%F0%9F%98%82-7111887189571160875',
'info_dict': {
'id': '7111887189571160875',
- 'title': 'imanoreotwe-%F0%9F%98%82'
+ 'title': 'imanoreotwe-%F0%9F%98%82',
},
- 'playlist_mincount': 100
+ 'playlist_mincount': 100,
}]
_API_BASE_URL = 'https://www.tiktok.com/api/collection/item_list/'
_PAGE_COUNT = 30