diff options
Diffstat (limited to 'yt_dlp/extractor/twitter.py')
-rw-r--r-- | yt_dlp/extractor/twitter.py | 79 |
1 files changed, 38 insertions, 41 deletions
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 1a11162..d056797 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -2,14 +2,10 @@ import functools import json import random import re +import urllib.parse from .common import InfoExtractor from .periscope import PeriscopeBaseIE, PeriscopeIE -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, -) from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -18,6 +14,7 @@ from ..utils import ( float_or_none, format_field, int_or_none, + join_nonempty, make_archive_id, remove_end, str_or_none, @@ -46,9 +43,9 @@ class TwitterBaseIE(InfoExtractor): 'flow_context': { 'debug_overrides': {}, 'start_location': { - 'location': 'unknown' - } - } + 'location': 'unknown', + }, + }, }, 'subtask_versions': { 'action_list': 2, @@ -91,8 +88,8 @@ class TwitterBaseIE(InfoExtractor): 'user_recommendations_list': 4, 'user_recommendations_urt': 1, 'wait_spinner': 3, - 'web_modal': 1 - } + 'web_modal': 1, + }, }, separators=(',', ':')).encode() def _extract_variant_formats(self, variant, video_id): @@ -111,7 +108,7 @@ class TwitterBaseIE(InfoExtractor): tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None f = { 'url': variant_url, - 'format_id': 'http' + ('-%d' % tbr if tbr else ''), + 'format_id': join_nonempty('http', tbr), 'tbr': tbr, } self._search_dimensions_in_video_url(f, variant_url) @@ -126,7 +123,7 @@ class TwitterBaseIE(InfoExtractor): subtitles = {} urls = [] for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'): - video_variant.attrib['url'] = compat_urllib_parse_unquote( + video_variant.attrib['url'] = urllib.parse.unquote( video_variant.attrib['url']) urls.append(video_variant.attrib['url']) fmts, subs = self._extract_variant_formats( @@ -218,7 +215,7 @@ class TwitterBaseIE(InfoExtractor): def build_login_json(*subtask_inputs): return json.dumps({ 'flow_token': self._flow_token, - 'subtask_inputs': subtask_inputs + 'subtask_inputs': subtask_inputs, }, separators=(',', ':')).encode() def input_dict(subtask_id, text): @@ -226,8 +223,8 @@ class TwitterBaseIE(InfoExtractor): 'subtask_id': subtask_id, 'enter_text': { 'text': text, - 'link': 'next_link' - } + 'link': 'next_link', + }, } next_subtask = self._call_login_api( @@ -240,8 +237,8 @@ class TwitterBaseIE(InfoExtractor): 'subtask_id': next_subtask, 'js_instrumentation': { 'response': '{}', - 'link': 'next_link' - } + 'link': 'next_link', + }, })) elif next_subtask == 'LoginEnterUserIdentifierSSO': @@ -253,12 +250,12 @@ class TwitterBaseIE(InfoExtractor): 'key': 'user_identifier', 'response_data': { 'text_data': { - 'result': username - } - } + 'result': username, + }, + }, }], - 'link': 'next_link' - } + 'link': 'next_link', + }, })) elif next_subtask == 'LoginEnterAlternateIdentifierSubtask': @@ -273,8 +270,8 @@ class TwitterBaseIE(InfoExtractor): 'subtask_id': next_subtask, 'enter_password': { 'password': password, - 'link': 'next_link' - } + 'link': 'next_link', + }, })) elif next_subtask == 'AccountDuplicationCheck': @@ -282,8 +279,8 @@ class TwitterBaseIE(InfoExtractor): 'Submitting account duplication check', headers, data=build_login_json({ 'subtask_id': next_subtask, 'check_logged_in_account': { - 'link': 'AccountDuplicationCheck_false' - } + 'link': 'AccountDuplicationCheck_false', + }, })) elif next_subtask == 'LoginTwoFactorAuthChallenge': @@ -317,7 +314,7 @@ class TwitterBaseIE(InfoExtractor): 'x-twitter-client-language': 'en', 'x-twitter-active-user': 'yes', } if self.is_logged_in else { - 'x-guest-token': self._fetch_guest_token(video_id) + 'x-guest-token': self._fetch_guest_token(video_id), }) allowed_status = {400, 401, 403, 404} if graphql else {403} result = self._download_json( @@ -388,7 +385,7 @@ class TwitterCardIE(InfoExtractor): 'repost_count': int, 'tags': ['PlutoFlyby'], }, - 'params': {'format': '[protocol=https]'} + 'params': {'format': '[protocol=https]'}, }, { 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', @@ -1221,7 +1218,7 @@ class TwitterIE(TwitterBaseIE): 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+', 'age_limit': 0, '_old_archive_ids': ['twitter 1790637656616943991'], - } + }, }, { # onion route 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273', @@ -1374,11 +1371,11 @@ class TwitterIE(TwitterBaseIE): 'responsive_web_media_download_video_enabled': False, 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False, 'responsive_web_graphql_timeline_navigation_enabled': True, - 'responsive_web_enhance_cards_enabled': False + 'responsive_web_enhance_cards_enabled': False, }, 'fieldToggles': { - 'withArticleRichContentState': False - } + 'withArticleRichContentState': False, + }, } def _call_syndication_api(self, twid): @@ -1644,9 +1641,9 @@ class TwitterAmplifyIE(TwitterBaseIE): def _find_dimension(target): w = int_or_none(self._html_search_meta( - 'twitter:%s:width' % target, webpage, fatal=False)) + f'twitter:{target}:width', webpage, fatal=False)) h = int_or_none(self._html_search_meta( - 'twitter:%s:height' % target, webpage, fatal=False)) + f'twitter:{target}:height', webpage, fatal=False)) return w, h if thumbnail: @@ -1740,7 +1737,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): m3u8_url = source.get('noRedirectPlaybackUrl') or source['location'] if '/live_video_stream/geoblocked/' in m3u8_url: self.raise_geo_restricted() - m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse( + m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse( m3u8_url).query).get('type', [None])[0] state, width, height = self._extract_common_format_info(broadcast) info['formats'] = self._extract_pscp_m3u8_formats( @@ -1895,12 +1892,12 @@ class TwitterShortenerIE(TwitterBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) - eid, id = mobj.group('eid', 'id') + eid, shortcode = mobj.group('eid', 'id') if eid: - id = eid - url = self._BASE_URL + id - new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url - __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link=" + shortcode = eid + url = self._BASE_URL + shortcode + new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url + __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link=' if new_url.startswith(__UNSAFE_LINK): - new_url = new_url.replace(__UNSAFE_LINK, "") + new_url = new_url.replace(__UNSAFE_LINK, '') return self.url_result(new_url) |