summaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/twitter.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--yt_dlp/extractor/twitter.py79
1 files changed, 38 insertions, 41 deletions
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 1a11162..d056797 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -2,14 +2,10 @@ import functools
import json
import random
import re
+import urllib.parse
from .common import InfoExtractor
from .periscope import PeriscopeBaseIE, PeriscopeIE
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse,
-)
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
@@ -18,6 +14,7 @@ from ..utils import (
float_or_none,
format_field,
int_or_none,
+ join_nonempty,
make_archive_id,
remove_end,
str_or_none,
@@ -46,9 +43,9 @@ class TwitterBaseIE(InfoExtractor):
'flow_context': {
'debug_overrides': {},
'start_location': {
- 'location': 'unknown'
- }
- }
+ 'location': 'unknown',
+ },
+ },
},
'subtask_versions': {
'action_list': 2,
@@ -91,8 +88,8 @@ class TwitterBaseIE(InfoExtractor):
'user_recommendations_list': 4,
'user_recommendations_urt': 1,
'wait_spinner': 3,
- 'web_modal': 1
- }
+ 'web_modal': 1,
+ },
}, separators=(',', ':')).encode()
def _extract_variant_formats(self, variant, video_id):
@@ -111,7 +108,7 @@ class TwitterBaseIE(InfoExtractor):
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
f = {
'url': variant_url,
- 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+ 'format_id': join_nonempty('http', tbr),
'tbr': tbr,
}
self._search_dimensions_in_video_url(f, variant_url)
@@ -126,7 +123,7 @@ class TwitterBaseIE(InfoExtractor):
subtitles = {}
urls = []
for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
- video_variant.attrib['url'] = compat_urllib_parse_unquote(
+ video_variant.attrib['url'] = urllib.parse.unquote(
video_variant.attrib['url'])
urls.append(video_variant.attrib['url'])
fmts, subs = self._extract_variant_formats(
@@ -218,7 +215,7 @@ class TwitterBaseIE(InfoExtractor):
def build_login_json(*subtask_inputs):
return json.dumps({
'flow_token': self._flow_token,
- 'subtask_inputs': subtask_inputs
+ 'subtask_inputs': subtask_inputs,
}, separators=(',', ':')).encode()
def input_dict(subtask_id, text):
@@ -226,8 +223,8 @@ class TwitterBaseIE(InfoExtractor):
'subtask_id': subtask_id,
'enter_text': {
'text': text,
- 'link': 'next_link'
- }
+ 'link': 'next_link',
+ },
}
next_subtask = self._call_login_api(
@@ -240,8 +237,8 @@ class TwitterBaseIE(InfoExtractor):
'subtask_id': next_subtask,
'js_instrumentation': {
'response': '{}',
- 'link': 'next_link'
- }
+ 'link': 'next_link',
+ },
}))
elif next_subtask == 'LoginEnterUserIdentifierSSO':
@@ -253,12 +250,12 @@ class TwitterBaseIE(InfoExtractor):
'key': 'user_identifier',
'response_data': {
'text_data': {
- 'result': username
- }
- }
+ 'result': username,
+ },
+ },
}],
- 'link': 'next_link'
- }
+ 'link': 'next_link',
+ },
}))
elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
@@ -273,8 +270,8 @@ class TwitterBaseIE(InfoExtractor):
'subtask_id': next_subtask,
'enter_password': {
'password': password,
- 'link': 'next_link'
- }
+ 'link': 'next_link',
+ },
}))
elif next_subtask == 'AccountDuplicationCheck':
@@ -282,8 +279,8 @@ class TwitterBaseIE(InfoExtractor):
'Submitting account duplication check', headers, data=build_login_json({
'subtask_id': next_subtask,
'check_logged_in_account': {
- 'link': 'AccountDuplicationCheck_false'
- }
+ 'link': 'AccountDuplicationCheck_false',
+ },
}))
elif next_subtask == 'LoginTwoFactorAuthChallenge':
@@ -317,7 +314,7 @@ class TwitterBaseIE(InfoExtractor):
'x-twitter-client-language': 'en',
'x-twitter-active-user': 'yes',
} if self.is_logged_in else {
- 'x-guest-token': self._fetch_guest_token(video_id)
+ 'x-guest-token': self._fetch_guest_token(video_id),
})
allowed_status = {400, 401, 403, 404} if graphql else {403}
result = self._download_json(
@@ -388,7 +385,7 @@ class TwitterCardIE(InfoExtractor):
'repost_count': int,
'tags': ['PlutoFlyby'],
},
- 'params': {'format': '[protocol=https]'}
+ 'params': {'format': '[protocol=https]'},
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
@@ -1221,7 +1218,7 @@ class TwitterIE(TwitterBaseIE):
'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
'age_limit': 0,
'_old_archive_ids': ['twitter 1790637656616943991'],
- }
+ },
}, {
# onion route
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@@ -1374,11 +1371,11 @@ class TwitterIE(TwitterBaseIE):
'responsive_web_media_download_video_enabled': False,
'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
'responsive_web_graphql_timeline_navigation_enabled': True,
- 'responsive_web_enhance_cards_enabled': False
+ 'responsive_web_enhance_cards_enabled': False,
},
'fieldToggles': {
- 'withArticleRichContentState': False
- }
+ 'withArticleRichContentState': False,
+ },
}
def _call_syndication_api(self, twid):
@@ -1644,9 +1641,9 @@ class TwitterAmplifyIE(TwitterBaseIE):
def _find_dimension(target):
w = int_or_none(self._html_search_meta(
- 'twitter:%s:width' % target, webpage, fatal=False))
+ f'twitter:{target}:width', webpage, fatal=False))
h = int_or_none(self._html_search_meta(
- 'twitter:%s:height' % target, webpage, fatal=False))
+ f'twitter:{target}:height', webpage, fatal=False))
return w, h
if thumbnail:
@@ -1740,7 +1737,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
if '/live_video_stream/geoblocked/' in m3u8_url:
self.raise_geo_restricted()
- m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
+ m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse(
m3u8_url).query).get('type', [None])[0]
state, width, height = self._extract_common_format_info(broadcast)
info['formats'] = self._extract_pscp_m3u8_formats(
@@ -1895,12 +1892,12 @@ class TwitterShortenerIE(TwitterBaseIE):
def _real_extract(self, url):
mobj = self._match_valid_url(url)
- eid, id = mobj.group('eid', 'id')
+ eid, shortcode = mobj.group('eid', 'id')
if eid:
- id = eid
- url = self._BASE_URL + id
- new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
- __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
+ shortcode = eid
+ url = self._BASE_URL + shortcode
+ new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url
+ __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
if new_url.startswith(__UNSAFE_LINK):
- new_url = new_url.replace(__UNSAFE_LINK, "")
+ new_url = new_url.replace(__UNSAFE_LINK, '')
return self.url_result(new_url)