diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 09:06:11 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-05 09:06:11 +0000 |
commit | fd5a06560caab95c71a2e2e805efa8d0f3a696a0 (patch) | |
tree | e1c600b8612bc4b301e2f51b875fcd835c5008cc /yt_dlp | |
parent | Releasing progress-linux version 2024.05.27-1~progress7.99u1. (diff) | |
download | yt-dlp-fd5a06560caab95c71a2e2e805efa8d0f3a696a0.tar.xz yt-dlp-fd5a06560caab95c71a2e2e805efa8d0f3a696a0.zip |
Merging upstream version 2024.07.01.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'yt_dlp')
889 files changed, 9055 insertions, 8001 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c6f695..e56c3ed 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -4,6 +4,7 @@ import copy import datetime as dt import errno import fileinput +import functools import http.cookiejar import io import itertools @@ -24,7 +25,7 @@ import traceback import unicodedata from .cache import Cache -from .compat import functools, urllib # isort: split +from .compat import urllib # isort: split from .compat import compat_os_name, urllib_req_to_req from .cookies import LenientSimpleCookie, load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name @@ -109,7 +110,6 @@ from .utils import ( determine_protocol, encode_compat_str, encodeFilename, - error_to_compat_str, escapeHTML, expand_path, extract_basic_auth, @@ -159,7 +159,7 @@ from .utils import ( write_json_file, write_string, ) -from .utils._utils import _YDLLogger +from .utils._utils import _UnsafeExtensionError, _YDLLogger from .utils.networking import ( HTTPHeaderDict, clean_headers, @@ -172,6 +172,20 @@ if compat_os_name == 'nt': import ctypes +def _catch_unsafe_extension_error(func): + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + try: + return func(self, *args, **kwargs) + except _UnsafeExtensionError as error: + self.report_error( + f'The extracted extension ({error.extension!r}) is unusual ' + 'and will be skipped for safety reasons. ' + f'If you believe this is an error{bug_reports_message(",")}') + + return wrapper + + class YoutubeDL: """YoutubeDL class. @@ -454,8 +468,9 @@ class YoutubeDL: Set the value to 'native' to use the native downloader compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: - filename, abort-on-error, multistreams, no-live-chat, format-sort - no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. + filename, abort-on-error, multistreams, no-live-chat, + format-sort, no-clean-infojson, no-playlist-metafiles, + no-keep-subs, no-attach-info-json, allow-unsafe-ext. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', @@ -582,8 +597,9 @@ class YoutubeDL: 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies', - 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', - 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' + 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url', + 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', + 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', } _deprecated_multivalue_fields = { 'album_artist': 'album_artists', @@ -594,7 +610,7 @@ class YoutubeDL: } _format_selection_exts = { 'audio': set(MEDIA_EXTENSIONS.common_audio), - 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), + 'video': {*MEDIA_EXTENSIONS.common_video, '3gp'}, 'storyboards': set(MEDIA_EXTENSIONS.storyboards), } @@ -628,7 +644,7 @@ class YoutubeDL: error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, console=None if compat_os_name == 'nt' else next( - filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) + filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), ) try: @@ -679,9 +695,9 @@ class YoutubeDL: width_args = [] if width is None else ['-w', str(width)] sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} try: - self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) + self._output_process = Popen(['bidiv', *width_args], **sp_kwargs) except OSError: - self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: @@ -822,8 +838,7 @@ class YoutubeDL: ) self.report_warning( 'Long argument string detected. ' - 'Use -- to separate parameters and URLs, like this:\n%s' % - shell_quote(correct_argv)) + f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}') def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -922,7 +937,7 @@ class YoutubeDL: if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'): return self._write_string( - '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')), self._out_files.screen, only_once=only_once) def to_stderr(self, message, only_once=False): @@ -1045,10 +1060,10 @@ class YoutubeDL: return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs) def report_warning(self, message, only_once=False): - ''' + """ Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored - ''' + """ if self.params.get('logger') is not None: self.params['logger'].warning(message) else: @@ -1066,14 +1081,14 @@ class YoutubeDL: self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True) def report_error(self, message, *args, **kwargs): - ''' + """ Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. - ''' + """ self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) def write_debug(self, message, only_once=False): - '''Log debug message or Print message to stderr''' + """Log debug message or Print message to stderr""" if not self.params.get('verbose', False): return message = f'[debug] {message}' @@ -1085,14 +1100,14 @@ class YoutubeDL: def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: - self.to_screen('[download] %s has already been downloaded' % file_name) + self.to_screen(f'[download] {file_name} has already been downloaded') except UnicodeEncodeError: self.to_screen('[download] The file has already been downloaded') def report_file_delete(self, file_name): """Report that existing file will be deleted.""" try: - self.to_screen('Deleting existing file %s' % file_name) + self.to_screen(f'Deleting existing file {file_name}') except UnicodeEncodeError: self.to_screen('Deleting existing file') @@ -1147,7 +1162,7 @@ class YoutubeDL: @staticmethod def escape_outtmpl(outtmpl): - ''' Escape any remaining strings like %s, %abc% etc. ''' + """ Escape any remaining strings like %s, %abc% etc. """ return re.sub( STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), @@ -1155,7 +1170,7 @@ class YoutubeDL: @classmethod def validate_outtmpl(cls, outtmpl): - ''' @return None or Exception object ''' + """ @return None or Exception object """ outtmpl = re.sub( STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'), lambda mobj: f'{mobj.group(0)[:-1]}s', @@ -1208,13 +1223,13 @@ class YoutubeDL: } # Field is of the form key1.key2... # where keys (except first) can be string, int, slice or "{field, ...}" - FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} - FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { + FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031 + FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031 'inner': FIELD_INNER_RE, - 'field': rf'\w*(?:\.{FIELD_INNER_RE})*' + 'field': rf'\w*(?:\.{FIELD_INNER_RE})*', } MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' - MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) + MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys()))) INTERNAL_FORMAT_RE = re.compile(rf'''(?xs) (?P<negate>-)? (?P<fields>{FIELD_RE}) @@ -1337,7 +1352,7 @@ class YoutubeDL: value, default = None, na fmt = outer_mobj.group('format') - if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int): + if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int): fmt = f'0{field_size_compat_map[last_field]:d}d' flags = outer_mobj.group('conversion') or '' @@ -1362,7 +1377,7 @@ class YoutubeDL: elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( # "+" = compatibility equivalence, "#" = NFD - 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), + 'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), value), str_fmt elif fmt[-1] == 'D': # decimal suffix num_fmt, fmt = fmt[:-1].replace('#', ''), 's' @@ -1390,7 +1405,7 @@ class YoutubeDL: if fmt[-1] in 'csra': value = sanitizer(last_field, value) - key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) + key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) @@ -1400,6 +1415,7 @@ class YoutubeDL: outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) return self.escape_outtmpl(outtmpl) % info_dict + @_catch_unsafe_extension_error def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None): assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive' if outtmpl is None: @@ -1479,9 +1495,9 @@ class YoutubeDL: date = info_dict.get('upload_date') if date is not None: - dateRange = self.params.get('daterange', DateRange()) - if date not in dateRange: - return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}' + date_range = self.params.get('daterange', DateRange()) + if date not in date_range: + return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}' view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') @@ -1491,7 +1507,7 @@ class YoutubeDL: if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): - return 'Skipping "%s" because it is age restricted' % video_title + return f'Skipping "{video_title}" because it is age restricted' match_filter = self.params.get('match_filter') if match_filter is None: @@ -1544,7 +1560,7 @@ class YoutubeDL: @staticmethod def add_extra_info(info_dict, extra_info): - '''Set the keys from extra_info in info dict if they are missing''' + """Set the keys from extra_info in info dict if they are missing""" for key, value in extra_info.items(): info_dict.setdefault(key, value) @@ -1590,7 +1606,7 @@ class YoutubeDL: self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: ' 'has already been recorded in the archive') if self.params.get('break_on_existing', False): - raise ExistingVideoReached() + raise ExistingVideoReached break return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process) else: @@ -1616,8 +1632,8 @@ class YoutubeDL: except GeoRestrictedError as e: msg = e.msg if e.countries: - msg += '\nThis video is available in %s.' % ', '.join( - map(ISO3166Utils.short2full, e.countries)) + msg += '\nThis video is available in {}.'.format(', '.join( + map(ISO3166Utils.short2full, e.countries))) msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' self.report_error(msg) except ExtractorError as e: # An error we somewhat expected @@ -1826,8 +1842,8 @@ class YoutubeDL: if isinstance(additional_urls, str): additional_urls = [additional_urls] self.to_screen( - '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls))) - self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) + '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls))) + self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls))) ie_result['additional_entries'] = [ self.extract_info( url, download, extra_info=extra_info, @@ -1879,8 +1895,8 @@ class YoutubeDL: webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url if webpage_url and webpage_url in self._playlist_urls: self.to_screen( - '[download] Skipping already downloaded playlist: %s' - % ie_result.get('title') or ie_result.get('id')) + '[download] Skipping already downloaded playlist: {}'.format( + ie_result.get('title')) or ie_result.get('id')) return self._playlist_level += 1 @@ -1895,8 +1911,8 @@ class YoutubeDL: self._playlist_urls.clear() elif result_type == 'compat_list': self.report_warning( - 'Extractor %s returned a compat_list result. ' - 'It needs to be updated.' % ie_result.get('extractor')) + 'Extractor {} returned a compat_list result. ' + 'It needs to be updated.'.format(ie_result.get('extractor'))) def _fixup(r): self.add_extra_info(r, { @@ -1913,7 +1929,7 @@ class YoutubeDL: ] return ie_result else: - raise Exception('Invalid result type: %s' % result_type) + raise Exception(f'Invalid result type: {result_type}') def _ensure_dir_exists(self, path): return make_dir(path, self.report_error) @@ -1927,6 +1943,8 @@ class YoutubeDL: 'playlist_title': ie_result.get('title'), 'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader_id': ie_result.get('uploader_id'), + 'playlist_channel': ie_result.get('channel'), + 'playlist_channel_id': ie_result.get('channel_id'), **kwargs, } if strict: @@ -2029,8 +2047,9 @@ class YoutubeDL: resolved_entries[i] = (playlist_index, NO_DEFAULT) continue - self.to_screen('[download] Downloading item %s of %s' % ( - self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) + self.to_screen( + f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} ' + f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}') entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, @@ -2080,9 +2099,9 @@ class YoutubeDL: } operator_rex = re.compile(r'''(?x)\s* (?P<key>[\w.-]+)\s* - (?P<op>%s)(?P<none_inclusive>\s*\?)?\s* + (?P<op>{})(?P<none_inclusive>\s*\?)?\s* (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* - ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + '''.format('|'.join(map(re.escape, OPERATORS.keys())))) m = operator_rex.fullmatch(filter_spec) if m: try: @@ -2093,7 +2112,7 @@ class YoutubeDL: comparison_value = parse_filesize(m.group('value') + 'B') if comparison_value is None: raise ValueError( - 'Invalid value %r in format specification %r' % ( + 'Invalid value {!r} in format specification {!r}'.format( m.group('value'), filter_spec)) op = OPERATORS[m.group('op')] @@ -2103,15 +2122,15 @@ class YoutubeDL: '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, - '~=': lambda attr, value: value.search(attr) is not None + '~=': lambda attr, value: value.search(attr) is not None, } str_operator_rex = re.compile(r'''(?x)\s* (?P<key>[a-zA-Z0-9._-]+)\s* - (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)? + (?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)? (?P<quote>["'])? (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+)) (?(quote)(?P=quote))\s* - ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) + '''.format('|'.join(map(re.escape, STR_OPERATORS.keys())))) m = str_operator_rex.fullmatch(filter_spec) if m: if m.group('op') == '~=': @@ -2125,7 +2144,7 @@ class YoutubeDL: op = str_op if not m: - raise SyntaxError('Invalid filter specification %r' % filter_spec) + raise SyntaxError(f'Invalid filter specification {filter_spec!r}') def _filter(f): actual_value = f.get(m.group('key')) @@ -2141,7 +2160,7 @@ class YoutubeDL: if working: yield f continue - self.to_screen('[info] Testing format %s' % f['format_id']) + self.to_screen('[info] Testing format {}'.format(f['format_id'])) path = self.get_output_path('temp') if not self._ensure_dir_exists(f'{path}/'): continue @@ -2149,19 +2168,19 @@ class YoutubeDL: temp_file.close() try: success, _ = self.dl(temp_file.name, f, test=True) - except (DownloadError, OSError, ValueError) + network_exceptions: + except (DownloadError, OSError, ValueError, *network_exceptions): success = False finally: if os.path.exists(temp_file.name): try: os.remove(temp_file.name) except OSError: - self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') f['__working'] = success if success: yield f else: - self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) def _select_formats(self, formats, selector): return list(selector({ @@ -2214,8 +2233,8 @@ class YoutubeDL: def _parse_filter(tokens): filter_parts = [] - for type, string_, start, _, _ in tokens: - if type == tokenize.OP and string_ == ']': + for type_, string_, _start, _, _ in tokens: + if type_ == tokenize.OP and string_ == ']': return ''.join(filter_parts) else: filter_parts.append(string_) @@ -2225,23 +2244,23 @@ class YoutubeDL: # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' ALLOWED_OPS = ('/', '+', ',', '(', ')') last_string, last_start, last_end, last_line = None, None, None, None - for type, string_, start, end, line in tokens: - if type == tokenize.OP and string_ == '[': + for type_, string_, start, end, line in tokens: + if type_ == tokenize.OP and string_ == '[': if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string_, start, end, line + yield type_, string_, start, end, line # everything inside brackets will be handled by _parse_filter - for type, string_, start, end, line in tokens: - yield type, string_, start, end, line - if type == tokenize.OP and string_ == ']': + for type_, string_, start, end, line in tokens: + yield type_, string_, start, end, line + if type_ == tokenize.OP and string_ == ']': break - elif type == tokenize.OP and string_ in ALLOWED_OPS: + elif type_ == tokenize.OP and string_ in ALLOWED_OPS: if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string_, start, end, line - elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + yield type_, string_, start, end, line + elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: if not last_string: last_string = string_ last_start = start @@ -2254,13 +2273,13 @@ class YoutubeDL: def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None - for type, string_, start, _, _ in tokens: + for type_, string_, start, _, _ in tokens: # ENCODING is only defined in Python 3.x - if type == getattr(tokenize, 'ENCODING', None): + if type_ == getattr(tokenize, 'ENCODING', None): continue - elif type in [tokenize.NAME, tokenize.NUMBER]: + elif type_ in [tokenize.NAME, tokenize.NUMBER]: current_selector = FormatSelector(SINGLE, string_, []) - elif type == tokenize.OP: + elif type_ == tokenize.OP: if string_ == ')': if not inside_group: # ')' will be handled by the parentheses group @@ -2303,7 +2322,7 @@ class YoutubeDL: current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: raise syntax_error(f'Operator not recognized: "{string_}"', start) - elif type == tokenize.ENDMARKER: + elif type_ == tokenize.ENDMARKER: break if current_selector: selectors.append(current_selector) @@ -2378,7 +2397,7 @@ class YoutubeDL: 'acodec': the_only_audio.get('acodec'), 'abr': the_only_audio.get('abr'), 'asr': the_only_audio.get('asr'), - 'audio_channels': the_only_audio.get('audio_channels') + 'audio_channels': the_only_audio.get('audio_channels'), }) return new_dict @@ -2459,9 +2478,9 @@ class YoutubeDL: format_fallback = not format_type and not format_modified # for b, w _filter_f = ( - (lambda f: f.get('%scodec' % format_type) != 'none') + (lambda f: f.get(f'{format_type}codec') != 'none') if format_type and format_modified # bv*, ba*, wv*, wa* - else (lambda f: f.get('%scodec' % not_format_type) == 'none') + else (lambda f: f.get(f'{not_format_type}codec') == 'none') if format_type # bv, ba, wv, wa else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') if not format_modified # b, w @@ -2529,7 +2548,7 @@ class YoutubeDL: def __next__(self): if self.counter >= len(self.tokens): - raise StopIteration() + raise StopIteration value = self.tokens[self.counter] self.counter += 1 return value @@ -2612,7 +2631,7 @@ class YoutubeDL: self._sort_thumbnails(thumbnails) for i, t in enumerate(thumbnails): if t.get('id') is None: - t['id'] = '%d' % i + t['id'] = str(i) if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) @@ -2673,8 +2692,8 @@ class YoutubeDL: # Auto generate title fields corresponding to the *_number fields when missing # in order to always have clean titles. This is very common for TV series. for field in ('chapter', 'season', 'episode'): - if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): - info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number']) for old_key, new_key in self._deprecated_multivalue_fields.items(): if new_key in info_dict and old_key in info_dict: @@ -2706,8 +2725,8 @@ class YoutubeDL: def report_force_conversion(field, field_not, conversion): self.report_warning( - '"%s" field is not %s - forcing %s conversion, there is an error in extractor' - % (field, field_not, conversion)) + f'"{field}" field is not {field_not} - forcing {conversion} conversion, ' + 'there is an error in extractor') def sanitize_string_field(info, string_field): field = info.get(string_field) @@ -2824,28 +2843,28 @@ class YoutubeDL: if not formats: self.raise_no_formats(info_dict) - for format in formats: - sanitize_string_field(format, 'format_id') - sanitize_numeric_fields(format) - format['url'] = sanitize_url(format['url']) - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() - if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): - if format.get('acodec') is None: - format['acodec'] = format['ext'] - if format.get('protocol') is None: - format['protocol'] = determine_protocol(format) - if format.get('resolution') is None: - format['resolution'] = self.format_resolution(format, default=None) - if format.get('dynamic_range') is None and format.get('vcodec') != 'none': - format['dynamic_range'] = 'SDR' - if format.get('aspect_ratio') is None: - format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) + for fmt in formats: + sanitize_string_field(fmt, 'format_id') + sanitize_numeric_fields(fmt) + fmt['url'] = sanitize_url(fmt['url']) + if fmt.get('ext') is None: + fmt['ext'] = determine_ext(fmt['url']).lower() + if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): + if fmt.get('acodec') is None: + fmt['acodec'] = fmt['ext'] + if fmt.get('protocol') is None: + fmt['protocol'] = determine_protocol(fmt) + if fmt.get('resolution') is None: + fmt['resolution'] = self.format_resolution(fmt, default=None) + if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': + fmt['dynamic_range'] = 'SDR' + if fmt.get('aspect_ratio') is None: + fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2)) # For fragmented formats, "tbr" is often max bitrate and not average - if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url')) - and not format.get('filesize') and not format.get('filesize_approx')): - format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration')) - format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True) + if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url')) + and not fmt.get('filesize') and not fmt.get('filesize_approx')): + fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration')) + fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True) # Safeguard against old/insecure infojson when using --load-info-json if info_dict.get('http_headers'): @@ -2858,36 +2877,36 @@ class YoutubeDL: self.sort_formats({ 'formats': formats, - '_format_sort_fields': info_dict.get('_format_sort_fields') + '_format_sort_fields': info_dict.get('_format_sort_fields'), }) # Sanitize and group by format_id formats_dict = {} - for i, format in enumerate(formats): - if not format.get('format_id'): - format['format_id'] = str(i) + for i, fmt in enumerate(formats): + if not fmt.get('format_id'): + fmt['format_id'] = str(i) else: # Sanitize format_id from characters used in format selector expression - format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) - formats_dict.setdefault(format['format_id'], []).append(format) + fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id']) + formats_dict.setdefault(fmt['format_id'], []).append(fmt) # Make sure all formats have unique format_id common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): ambigious_id = len(ambiguous_formats) > 1 - for i, format in enumerate(ambiguous_formats): + for i, fmt in enumerate(ambiguous_formats): if ambigious_id: - format['format_id'] = '%s-%d' % (format_id, i) + fmt['format_id'] = f'{format_id}-{i}' # Ensure there is no conflict between id and ext in format selection # See https://github.com/yt-dlp/yt-dlp/issues/1282 - if format['format_id'] != format['ext'] and format['format_id'] in common_exts: - format['format_id'] = 'f%s' % format['format_id'] - - if format.get('format') is None: - format['format'] = '{id} - {res}{note}'.format( - id=format['format_id'], - res=self.format_resolution(format), - note=format_field(format, 'format_note', ' (%s)'), + if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts: + fmt['format_id'] = 'f{}'.format(fmt['format_id']) + + if fmt.get('format') is None: + fmt['format'] = '{id} - {res}{note}'.format( + id=fmt['format_id'], + res=self.format_resolution(fmt), + note=format_field(fmt, 'format_note', ' (%s)'), ) if self.params.get('check_formats') is True: @@ -3009,7 +3028,7 @@ class YoutubeDL: info_dict['requested_downloads'] = downloaded_formats info_dict = self.run_all_pps('after_video', info_dict) if max_downloads_reached: - raise MaxDownloadsReached() + raise MaxDownloadsReached # We update the info dict with the selected best quality format (backwards compatibility) info_dict.update(best_format) @@ -3070,8 +3089,8 @@ class YoutubeDL: else: f = formats[-1] self.report_warning( - 'No subtitle format found matching "%s" for language %s, ' - 'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext'])) + 'No subtitle format found matching "{}" for language {}, ' + 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext'])) subs[lang] = f return subs @@ -3189,6 +3208,7 @@ class YoutubeDL: os.remove(file) return None + @_catch_unsafe_extension_error def process_info(self, info_dict): """Process a single resolved IE result. (Modifies it in-place)""" @@ -3226,7 +3246,7 @@ class YoutubeDL: def check_max_downloads(): if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'): - raise MaxDownloadsReached() + raise MaxDownloadsReached if self.params.get('simulate'): info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') @@ -3400,7 +3420,7 @@ class YoutubeDL: for f in info_dict['requested_formats'] if fd != FFmpegFD else []: f['filepath'] = fname = prepend_extension( correct_ext(temp_filename, info_dict['ext']), - 'f%s' % f['format_id'], info_dict['ext']) + 'f{}'.format(f['format_id']), info_dict['ext']) downloaded.append(fname) info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats']) success, real_download = self.dl(temp_filename, info_dict) @@ -3433,7 +3453,7 @@ class YoutubeDL: if temp_filename != '-': fname = prepend_extension( correct_ext(temp_filename, new_info['ext']), - 'f%s' % f['format_id'], new_info['ext']) + 'f{}'.format(f['format_id']), new_info['ext']) if not self._ensure_dir_exists(fname): return f['filepath'] = fname @@ -3465,11 +3485,11 @@ class YoutubeDL: info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) except network_exceptions as err: - self.report_error('unable to download video data: %s' % error_to_compat_str(err)) + self.report_error(f'unable to download video data: {err}') return except OSError as err: raise UnavailableVideoError(err) - except (ContentTooShortError, ) as err: + except ContentTooShortError as err: self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return @@ -3536,13 +3556,13 @@ class YoutubeDL: try: replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move)) except PostProcessingError as err: - self.report_error('Postprocessing: %s' % str(err)) + self.report_error(f'Postprocessing: {err}') return try: for ph in self._post_hooks: ph(info_dict['filepath']) except Exception as err: - self.report_error('post hooks: %s' % str(err)) + self.report_error(f'post hooks: {err}') return info_dict['__write_download_archive'] = True @@ -3609,7 +3629,7 @@ class YoutubeDL: @staticmethod def sanitize_info(info_dict, remove_private_keys=False): - ''' Sanitize the infodict for converting to json ''' + """ Sanitize the infodict for converting to json """ if info_dict is None: return info_dict info_dict.setdefault('epoch', int(time.time())) @@ -3644,7 +3664,7 @@ class YoutubeDL: @staticmethod def filter_requested_info(info_dict, actually_filter=True): - ''' Alias of sanitize_info for backward compatibility ''' + """ Alias of sanitize_info for backward compatibility """ return YoutubeDL.sanitize_info(info_dict, actually_filter) def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): @@ -3666,7 +3686,7 @@ class YoutubeDL: actual_post_extract(video_dict or {}) return - post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) + post_extractor = info_dict.pop('__post_extractor', None) or dict info_dict.update(post_extractor()) actual_post_extract(info_dict or {}) @@ -3771,7 +3791,7 @@ class YoutubeDL: if format.get('width') and format.get('height'): return '%dx%d' % (format['width'], format['height']) elif format.get('height'): - return '%sp' % format['height'] + return '{}p'.format(format['height']) elif format.get('width'): return '%dx?' % format['width'] return default @@ -3788,7 +3808,7 @@ class YoutubeDL: if fdict.get('language'): if res: res += ' ' - res += '[%s]' % fdict['language'] + res += '[{}]'.format(fdict['language']) if fdict.get('format_note') is not None: if res: res += ' ' @@ -3800,7 +3820,7 @@ class YoutubeDL: if fdict.get('container') is not None: if res: res += ', ' - res += '%s container' % fdict['container'] + res += '{} container'.format(fdict['container']) if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): if res: @@ -3815,7 +3835,7 @@ class YoutubeDL: if fdict.get('fps') is not None: if res: res += ', ' - res += '%sfps' % fdict['fps'] + res += '{}fps'.format(fdict['fps']) if fdict.get('acodec') is not None: if res: res += ', ' @@ -3858,7 +3878,7 @@ class YoutubeDL: format_field(f, 'format_id'), format_field(f, 'ext'), self.format_resolution(f), - self._format_note(f) + self._format_note(f), ] for f in formats if (f.get('preference') or 0) >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) @@ -3964,11 +3984,11 @@ class YoutubeDL: from .extractor.extractors import _LAZY_LOADER from .extractor.extractors import ( _PLUGIN_CLASSES as plugin_ies, - _PLUGIN_OVERRIDES as plugin_ie_overrides + _PLUGIN_OVERRIDES as plugin_ie_overrides, ) def get_encoding(stream): - ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) + ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})')) additional_info = [] if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') @@ -3979,13 +3999,13 @@ class YoutubeDL: ret = f'{ret} ({",".join(additional_info)})' return ret - encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % ( + encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format( locale.getpreferredencoding(), sys.getfilesystemencoding(), self.get_encoding(), ', '.join( f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ - if stream is not None and key != 'console') + if stream is not None and key != 'console'), ) logger = self.params.get('logger') @@ -4017,7 +4037,7 @@ class YoutubeDL: else: write_debug('Lazy loading extractors is disabled') if self.params['compat_opts']: - write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) + write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts']))) if current_git_head(): write_debug(f'Git HEAD: {current_git_head()}') @@ -4026,14 +4046,14 @@ class YoutubeDL: exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: - exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) + exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features))) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' - write_debug('exe versions: %s' % exe_str) + write_debug(f'exe versions: {exe_str}') from .compat.compat_utils import get_package_info from .dependencies import available_dependencies @@ -4045,7 +4065,7 @@ class YoutubeDL: write_debug(f'Proxy map: {self.proxies}') write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): - display_list = ['%s%s' % ( + display_list = ['{}{}'.format( klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in plugins.items()] if plugin_type == 'Extractor': @@ -4062,14 +4082,13 @@ class YoutubeDL: # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() - write_debug('Public IP address: %s' % ipaddr) + write_debug(f'Public IP address: {ipaddr}') latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( - 'You are using an outdated version (newest version: %s)! ' - 'See https://yt-dl.org/update if you need help updating.' % - latest_version) + f'You are using an outdated version (newest version: {latest_version})! ' + 'See https://yt-dl.org/update if you need help updating.') @functools.cached_property def proxies(self): @@ -4103,7 +4122,7 @@ class YoutubeDL: return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies) def _get_available_impersonate_targets(self): - # todo(future): make available as public API + # TODO(future): make available as public API return [ (target, rh.RH_NAME) for rh in self._request_director.handlers.values() @@ -4112,7 +4131,7 @@ class YoutubeDL: ] def _impersonate_target_available(self, target): - # todo(future): make available as public API + # TODO(future): make available as public API return any( rh.is_supported_target(target) for rh in self._request_director.handlers.values() @@ -4238,7 +4257,7 @@ class YoutubeDL: return encoding def _write_info_json(self, label, ie_result, infofn, overwrite=None): - ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error ''' + """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """ if overwrite is None: overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): @@ -4261,7 +4280,7 @@ class YoutubeDL: return None def _write_description(self, label, ie_result, descfn): - ''' Write description and returns True = written, False = skip, None = error ''' + """ Write description and returns True = written, False = skip, None = error """ if not self.params.get('writedescription'): return False elif not descfn: @@ -4285,7 +4304,7 @@ class YoutubeDL: return True def _write_subtitles(self, info_dict, filename): - ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' + """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error""" ret = [] subtitles = info_dict.get('requested_subtitles') if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): @@ -4331,7 +4350,7 @@ class YoutubeDL: self.dl(sub_filename, sub_copy, subtitle=True) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) - except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err: msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' if self.params.get('ignoreerrors') is not True: # False or 'only_download' if not self.params.get('ignoreerrors'): @@ -4341,7 +4360,7 @@ class YoutubeDL: return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): - ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error ''' + """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """ write_all = self.params.get('write_all_thumbnails', False) thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): @@ -4368,8 +4387,8 @@ class YoutubeDL: existing_thumb = self.existing_file((thumb_filename_final, thumb_filename)) if existing_thumb: - self.to_screen('[info] %s is already present' % ( - thumb_display_id if multiple else f'{label} thumbnail').capitalize()) + self.to_screen('[info] {} is already present'.format(( + thumb_display_id if multiple else f'{label} thumbnail').capitalize())) t['filepath'] = existing_thumb ret.append((existing_thumb, thumb_filename_final)) else: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 3d606bc..f88f15d 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -14,7 +14,7 @@ import os import re import traceback -from .compat import compat_os_name, compat_shlex_quote +from .compat import compat_os_name from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader.external import get_external_downloader from .extractor import list_extractor_classes @@ -58,11 +58,13 @@ from .utils import ( read_stdin, render_table, setproctitle, + shell_quote, traverse_obj, variadic, write_string, ) from .utils.networking import std_headers +from .utils._utils import _UnsafeExtensionError from .YoutubeDL import YoutubeDL _IN_CLI = False @@ -115,9 +117,9 @@ def print_extractor_information(opts, urls): ie.description(markdown=False, search_examples=_SEARCHES) for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) elif opts.ap_list_mso: - out = 'Supported TV Providers:\n%s\n' % render_table( + out = 'Supported TV Providers:\n{}\n'.format(render_table( ['mso', 'mso name'], - [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]) + [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])) else: return False write_string(out, out=sys.stdout) @@ -129,7 +131,7 @@ def set_compat_opts(opts): if name not in opts.compat_opts: return False opts.compat_opts.discard(name) - opts.compat_opts.update(['*%s' % name]) + opts.compat_opts.update([f'*{name}']) return True def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): @@ -222,7 +224,7 @@ def validate_options(opts): validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') if opts.wait_for_video is not None: - min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None]) validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), 'time range to wait for video', opts.wait_for_video) validate_minmax(min_wait, max_wait, 'time range to wait for video') @@ -264,9 +266,9 @@ def validate_options(opts): # Retry sleep function def parse_sleep_func(expr): NUMBER_RE = r'\d+(?:\.\d+)?' - op, start, limit, step, *_ = tuple(re.fullmatch( + op, start, limit, step, *_ = (*tuple(re.fullmatch( rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', - expr.strip()).groups()) + (None, None) + expr.strip()).groups()), None, None) if op == 'exp': return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) @@ -396,13 +398,13 @@ def validate_options(opts): # MetadataParser def metadataparser_actions(f): if isinstance(f, str): - cmd = '--parse-metadata %s' % compat_shlex_quote(f) + cmd = f'--parse-metadata {shell_quote(f)}' try: actions = [MetadataFromFieldPP.to_action(f)] except Exception as err: raise ValueError(f'{cmd} is invalid; {err}') else: - cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) + cmd = f'--replace-in-metadata {shell_quote(f)}' actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) for action in actions: @@ -413,7 +415,7 @@ def validate_options(opts): yield action if opts.metafromtitle is not None: - opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}') opts.parse_metadata = { k: list(itertools.chain(*map(metadataparser_actions, v))) for k, v in opts.parse_metadata.items() @@ -592,6 +594,13 @@ def validate_options(opts): if opts.ap_username is not None and opts.ap_password is None: opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') + # compat option changes global state destructively; only allow from cli + if 'allow-unsafe-ext' in opts.compat_opts: + warnings.append( + 'Using allow-unsafe-ext opens you up to potential attacks. ' + 'Use with great care!') + _UnsafeExtensionError.sanitize_extension = lambda x: x + return warnings, deprecation_warnings @@ -602,7 +611,7 @@ def get_postprocessors(opts): yield { 'key': 'MetadataParser', 'actions': actions, - 'when': when + 'when': when, } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: @@ -610,19 +619,19 @@ def get_postprocessors(opts): 'key': 'SponsorBlock', 'categories': sponsorblock_query, 'api': opts.sponsorblock_api, - 'when': 'after_filter' + 'when': 'after_filter', } if opts.convertsubtitles: yield { 'key': 'FFmpegSubtitlesConvertor', 'format': opts.convertsubtitles, - 'when': 'before_dl' + 'when': 'before_dl', } if opts.convertthumbnails: yield { 'key': 'FFmpegThumbnailsConvertor', 'format': opts.convertthumbnails, - 'when': 'before_dl' + 'when': 'before_dl', } if opts.extractaudio: yield { @@ -647,7 +656,7 @@ def get_postprocessors(opts): yield { 'key': 'FFmpegEmbedSubtitle', # already_have_subtitle = True prevents the file from being deleted after embedding - 'already_have_subtitle': opts.writesubtitles and keep_subs + 'already_have_subtitle': opts.writesubtitles and keep_subs, } if not opts.writeautomaticsub and keep_subs: opts.writesubtitles = True @@ -660,7 +669,7 @@ def get_postprocessors(opts): 'remove_sponsor_segments': opts.sponsorblock_remove, 'remove_ranges': opts.remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, - 'force_keyframes': opts.force_keyframes_at_cuts + 'force_keyframes': opts.force_keyframes_at_cuts, } # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support @@ -694,7 +703,7 @@ def get_postprocessors(opts): yield { 'key': 'EmbedThumbnail', # already_have_thumbnail = True prevents the file from being deleted after embedding - 'already_have_thumbnail': opts.writethumbnail + 'already_have_thumbnail': opts.writethumbnail, } if not opts.writethumbnail: opts.writethumbnail = True @@ -741,7 +750,7 @@ def parse_options(argv=None): print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', - 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' + 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl', )) if opts.quiet is None: opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) @@ -1002,7 +1011,7 @@ def _real_main(argv=None): def make_row(target, handler): return [ join_nonempty(target.client.title(), target.version, delim='-') or '-', - join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-', + join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-', handler, ] diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index b3a383c..abf54a9 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -68,7 +68,7 @@ def pad_block(block, padding_mode): raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') if padding_mode == 'iso7816' and padding_size: - block = block + [0x80] # NB: += mutates list + block = [*block, 0x80] # NB: += mutates list padding_size -= 1 return block + [PADDING_BYTE[padding_mode]] * padding_size @@ -110,9 +110,7 @@ def aes_ecb_decrypt(data, key, iv=None): for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] encrypted_data += aes_decrypt(block, expanded_key) - encrypted_data = encrypted_data[:len(data)] - - return encrypted_data + return encrypted_data[:len(data)] def aes_ctr_decrypt(data, key, iv): @@ -148,9 +146,7 @@ def aes_ctr_encrypt(data, key, iv): cipher_counter_block = aes_encrypt(counter_block, expanded_key) encrypted_data += xor(block, cipher_counter_block) - encrypted_data = encrypted_data[:len(data)] - - return encrypted_data + return encrypted_data[:len(data)] def aes_cbc_decrypt(data, key, iv): @@ -174,9 +170,7 @@ def aes_cbc_decrypt(data, key, iv): decrypted_block = aes_decrypt(block, expanded_key) decrypted_data += xor(decrypted_block, previous_cipher_block) previous_cipher_block = block - decrypted_data = decrypted_data[:len(data)] - - return decrypted_data + return decrypted_data[:len(data)] def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): @@ -224,7 +218,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) if len(nonce) == 12: - j0 = nonce + [0, 0, 0, 1] + j0 = [*nonce, 0, 0, 0, 1] else: fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) @@ -242,11 +236,11 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): data + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data - + ((len(data) * 8).to_bytes(8, 'big'))) # length of data + + ((len(data) * 8).to_bytes(8, 'big'))), # length of data ) if tag != aes_ctr_encrypt(s_tag, key, j0): - raise ValueError("Mismatching authentication tag") + raise ValueError('Mismatching authentication tag') return decrypted_data @@ -288,9 +282,7 @@ def aes_decrypt(data, expanded_key): data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) data = shift_rows_inv(data) data = sub_bytes_inv(data) - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) - - return data + return xor(data, expanded_key[:BLOCK_SIZE_BYTES]) def aes_decrypt_text(data, password, key_size_bytes): @@ -318,9 +310,7 @@ def aes_decrypt_text(data, password, key_size_bytes): cipher = data[NONCE_LENGTH_BYTES:] decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) - plaintext = intlist_to_bytes(decrypted_data) - - return plaintext + return intlist_to_bytes(decrypted_data) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) @@ -428,9 +418,7 @@ def key_expansion(data): for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): temp = data[-4:] data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - data = data[:expanded_key_size_bytes] - - return data + return data[:expanded_key_size_bytes] def iter_vector(iv): @@ -511,7 +499,7 @@ def block_product(block_x, block_y): # NIST SP 800-38D, Algorithm 1 if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: - raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) + raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes') block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) block_v = block_y[:] @@ -534,7 +522,7 @@ def ghash(subkey, data): # NIST SP 800-38D, Algorithm 2 if len(data) % BLOCK_SIZE_BYTES: - raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) + raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes') last_y = [0] * BLOCK_SIZE_BYTES for i in range(0, len(data), BLOCK_SIZE_BYTES): diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index 9dd4f2f..71dca82 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -81,10 +81,10 @@ class Cache: cachedir = self._get_root_dir() if not any((term in cachedir) for term in ('cache', 'tmp')): - raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) + raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir') self._ydl.to_screen( - 'Removing cache dir %s .' % cachedir, skip_eol=True) + f'Removing cache dir {cachedir} .', skip_eol=True) if os.path.exists(cachedir): self._ydl.to_screen('.', skip_eol=True) shutil.rmtree(cachedir) diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index 7ea5d08..dfc792e 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -35,7 +35,7 @@ from .compat_utils import passthrough_module from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 -from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401 +from ..networking.exceptions import HTTPError as compat_HTTPError passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py index 36c9836..9668957 100644 --- a/yt_dlp/compat/functools.py +++ b/yt_dlp/compat/functools.py @@ -7,6 +7,6 @@ passthrough_module(__name__, 'functools') del passthrough_module try: - cache # >= 3.9 + _ = cache # >= 3.9 except NameError: cache = lru_cache(maxsize=None) diff --git a/yt_dlp/compat/imghdr.py b/yt_dlp/compat/imghdr.py index 5d64ab0..4ae173f 100644 --- a/yt_dlp/compat/imghdr.py +++ b/yt_dlp/compat/imghdr.py @@ -1,16 +1,22 @@ -tests = { - 'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP', - 'png': lambda h: h[:8] == b'\211PNG\r\n\032\n', - 'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'), - 'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'), -} - - def what(file=None, h=None): """Detect format of image (Currently supports jpeg, png, webp, gif only) - Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py + Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py + Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf """ if h is None: with open(file, 'rb') as f: h = f.read(12) - return next((type_ for type_, test in tests.items() if test(h)), None) + + if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8): + return 'webp' + + if h.startswith(b'\x89PNG'): + return 'png' + + if h.startswith(b'\xFF\xD8\xFF'): + return 'jpeg' + + if h.startswith(b'GIF'): + return 'gif' + + return None diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 815897d..070d2fc 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -2,7 +2,9 @@ import base64 import collections import contextlib import datetime as dt +import functools import glob +import hashlib import http.cookiejar import http.cookies import io @@ -17,14 +19,12 @@ import tempfile import time import urllib.request from enum import Enum, auto -from hashlib import pbkdf2_hmac from .aes import ( aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes, unpad_pkcs7, ) -from .compat import functools # isort: split from .compat import compat_os_name from .dependencies import ( _SECRETSTORAGE_UNAVAILABLE_REASON, @@ -146,7 +146,7 @@ def _extract_firefox_cookies(profile, container, logger): identities = json.load(containers).get('identities', []) container_id = next((context.get('userContextId') for context in identities if container in ( context.get('name'), - try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()) + try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()), )), None) if not isinstance(container_id, int): raise ValueError(f'could not find firefox container "{container}" in containers.json') @@ -263,7 +263,7 @@ def _get_chromium_based_browser_settings(browser_name): return { 'browser_dir': browser_dir, 'keyring_name': keyring_name, - 'supports_profiles': browser_name not in browsers_without_profiles + 'supports_profiles': browser_name not in browsers_without_profiles, } @@ -740,40 +740,38 @@ def _get_linux_desktop_environment(env, logger): xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) desktop_session = env.get('DESKTOP_SESSION', None) if xdg_current_desktop is not None: - xdg_current_desktop = xdg_current_desktop.split(':')[0].strip() - - if xdg_current_desktop == 'Unity': - if desktop_session is not None and 'gnome-fallback' in desktop_session: + for part in map(str.strip, xdg_current_desktop.split(':')): + if part == 'Unity': + if desktop_session is not None and 'gnome-fallback' in desktop_session: + return _LinuxDesktopEnvironment.GNOME + else: + return _LinuxDesktopEnvironment.UNITY + elif part == 'Deepin': + return _LinuxDesktopEnvironment.DEEPIN + elif part == 'GNOME': return _LinuxDesktopEnvironment.GNOME - else: - return _LinuxDesktopEnvironment.UNITY - elif xdg_current_desktop == 'Deepin': - return _LinuxDesktopEnvironment.DEEPIN - elif xdg_current_desktop == 'GNOME': - return _LinuxDesktopEnvironment.GNOME - elif xdg_current_desktop == 'X-Cinnamon': - return _LinuxDesktopEnvironment.CINNAMON - elif xdg_current_desktop == 'KDE': - kde_version = env.get('KDE_SESSION_VERSION', None) - if kde_version == '5': - return _LinuxDesktopEnvironment.KDE5 - elif kde_version == '6': - return _LinuxDesktopEnvironment.KDE6 - elif kde_version == '4': - return _LinuxDesktopEnvironment.KDE4 - else: - logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') - return _LinuxDesktopEnvironment.KDE4 - elif xdg_current_desktop == 'Pantheon': - return _LinuxDesktopEnvironment.PANTHEON - elif xdg_current_desktop == 'XFCE': - return _LinuxDesktopEnvironment.XFCE - elif xdg_current_desktop == 'UKUI': - return _LinuxDesktopEnvironment.UKUI - elif xdg_current_desktop == 'LXQt': - return _LinuxDesktopEnvironment.LXQT - else: - logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') + elif part == 'X-Cinnamon': + return _LinuxDesktopEnvironment.CINNAMON + elif part == 'KDE': + kde_version = env.get('KDE_SESSION_VERSION', None) + if kde_version == '5': + return _LinuxDesktopEnvironment.KDE5 + elif kde_version == '6': + return _LinuxDesktopEnvironment.KDE6 + elif kde_version == '4': + return _LinuxDesktopEnvironment.KDE4 + else: + logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') + return _LinuxDesktopEnvironment.KDE4 + elif part == 'Pantheon': + return _LinuxDesktopEnvironment.PANTHEON + elif part == 'XFCE': + return _LinuxDesktopEnvironment.XFCE + elif part == 'UKUI': + return _LinuxDesktopEnvironment.UKUI + elif part == 'LXQt': + return _LinuxDesktopEnvironment.LXQT + logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') elif desktop_session is not None: if desktop_session == 'deepin': @@ -826,7 +824,7 @@ def _choose_linux_keyring(logger): elif desktop_environment == _LinuxDesktopEnvironment.KDE6: linux_keyring = _LinuxKeyring.KWALLET6 elif desktop_environment in ( - _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER + _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER, ): linux_keyring = _LinuxKeyring.BASICTEXT else: @@ -861,7 +859,7 @@ def _get_kwallet_network_wallet(keyring, logger): 'dbus-send', '--session', '--print-reply=literal', f'--dest={service_name}', wallet_path, - 'org.kde.KWallet.networkWallet' + 'org.kde.KWallet.networkWallet', ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: @@ -891,7 +889,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger): 'kwallet-query', '--read-password', f'{browser_keyring_name} Safe Storage', '--folder', f'{browser_keyring_name} Keys', - network_wallet + network_wallet, ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: @@ -931,9 +929,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger): for item in col.get_all_items(): if item.get_label() == f'{browser_keyring_name} Safe Storage': return item.get_secret() - else: - logger.error('failed to read from keyring') - return b'' + logger.error('failed to read from keyring') + return b'' def _get_linux_keyring_password(browser_keyring_name, keyring, logger): @@ -1002,7 +999,7 @@ def _get_windows_v10_key(browser_root, logger): def pbkdf2_sha1(password, salt, iterations, key_length): - return pbkdf2_hmac('sha1', password, salt, iterations, key_length) + return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length) def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): @@ -1053,7 +1050,7 @@ def _decrypt_windows_dpapi(ciphertext, logger): None, # pvReserved: must be NULL None, # pPromptStruct: information about prompts to display 0, # dwFlags - ctypes.byref(blob_out) # pDataOut + ctypes.byref(blob_out), # pDataOut ) if not ret: logger.warning('failed to decrypt with DPAPI', only_once=True) @@ -1129,24 +1126,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') _RESERVED = { - "expires", - "path", - "comment", - "domain", - "max-age", - "secure", - "httponly", - "version", - "samesite", + 'expires', + 'path', + 'comment', + 'domain', + 'max-age', + 'secure', + 'httponly', + 'version', + 'samesite', } - _FLAGS = {"secure", "httponly"} + _FLAGS = {'secure', 'httponly'} # Added 'bad' group to catch the remaining value - _COOKIE_PATTERN = re.compile(r""" + _COOKIE_PATTERN = re.compile(r''' \s* # Optional whitespace at start of cookie (?P<key> # Start of group 'key' - [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter + [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter ) # End of group 'key' ( # Optional group: there may not be a value. \s*=\s* # Equal Sign @@ -1156,7 +1153,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): | # or \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr | # or - [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string + [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string ) # End of group 'val' | # or (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values @@ -1164,7 +1161,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): )? # End of optional value group \s* # Any number of spaces. (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII | re.VERBOSE) + ''', re.ASCII | re.VERBOSE) def load(self, data): # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 @@ -1260,14 +1257,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): # with no name, whereas http.cookiejar regards it as a # cookie with no value. name, value = '', name - f.write('%s\n' % '\t'.join(( + f.write('{}\n'.format('\t'.join(( cookie.domain, self._true_or_false(cookie.domain.startswith('.')), cookie.path, self._true_or_false(cookie.secure), str_or_none(cookie.expires, default=''), - name, value - ))) + name, value, + )))) def save(self, filename=None, ignore_discard=True, ignore_expires=True): """ @@ -1306,10 +1303,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): return line cookie_list = line.split('\t') if len(cookie_list) != self._ENTRY_LEN: - raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) + raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) if cookie.expires_at and not cookie.expires_at.isdigit(): - raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line cf = io.StringIO() diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 65a0d6f..2e3ea2f 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -404,7 +404,7 @@ class FileDownloader: def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" - self.to_screen('[download] Resuming download at byte %s' % resume_len) + self.to_screen(f'[download] Resuming download at byte {resume_len}') def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): """Report retry""" diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 8b0b94e..ae23729 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,4 +1,5 @@ import enum +import functools import json import os import re @@ -9,7 +10,6 @@ import time import uuid from .fragment import FragmentFD -from ..compat import functools from ..networking import Request from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( @@ -55,7 +55,7 @@ class ExternalFD(FragmentFD): # correct and expected termination thus all postprocessing # should take place retval = 0 - self.to_screen('[%s] Interrupted by user' % self.get_basename()) + self.to_screen(f'[{self.get_basename()}] Interrupted by user') finally: if self._cookies_tempfile: self.try_remove(self._cookies_tempfile) @@ -108,7 +108,7 @@ class ExternalFD(FragmentFD): return all(( not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, - not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), + not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'), all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), )) @@ -172,7 +172,7 @@ class ExternalFD(FragmentFD): decrypt_fragment = self.decrypter(info_dict) dest, _ = self.sanitize_open(tmpfilename, 'wb') for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + fragment_filename = f'{tmpfilename}-Frag{frag_index}' try: src, _ = self.sanitize_open(fragment_filename, 'rb') except OSError as err: @@ -186,7 +186,7 @@ class ExternalFD(FragmentFD): if not self.params.get('keep_fragments', False): self.try_remove(encodeFilename(fragment_filename)) dest.close() - self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) + self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) return 0 def _call_process(self, cmd, info_dict): @@ -336,11 +336,11 @@ class Aria2cFD(ExternalFD): if 'fragments' in info_dict: cmd += ['--uri-selector=inorder'] - url_list_file = '%s.frag.urls' % tmpfilename + url_list_file = f'{tmpfilename}.frag.urls' url_list = [] for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) - url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename))) + fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}' + url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename))) stream, _ = self.sanitize_open(url_list_file, 'wb') stream.write('\n'.join(url_list).encode()) stream.close() @@ -357,7 +357,7 @@ class Aria2cFD(ExternalFD): 'id': sanitycheck, 'method': method, 'params': [f'token:{rpc_secret}', *params], - }).encode('utf-8') + }).encode() request = Request( f'http://localhost:{rpc_port}/jsonrpc', data=d, headers={ @@ -416,7 +416,7 @@ class Aria2cFD(ExternalFD): 'total_bytes_estimate': total, 'eta': (total - downloaded) / (speed or 1), 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, - 'elapsed': time.time() - started + 'elapsed': time.time() - started, }) self._hook_progress(status, info_dict) @@ -509,12 +509,12 @@ class FFmpegFD(ExternalFD): proxy = self.params.get('proxy') if proxy: if not re.match(r'^[\da-zA-Z]+://', proxy): - proxy = 'http://%s' % proxy + proxy = f'http://{proxy}' if proxy.startswith('socks'): self.report_warning( - '%s does not support SOCKS proxies. Downloading is likely to fail. ' - 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) + f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. ' + 'Consider adding --hls-prefer-native to your command.') # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) @@ -575,7 +575,7 @@ class FFmpegFD(ExternalFD): if end_time: args += ['-t', str(end_time - start_time)] - args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 28cbba0..22d0ebd 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -67,12 +67,12 @@ class FlvReader(io.BytesIO): self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount - for i in range(quality_entry_count): + for _ in range(quality_entry_count): self.read_string() segment_run_count = self.read_unsigned_int() segments = [] - for i in range(segment_run_count): + for _ in range(segment_run_count): first_segment = self.read_unsigned_int() fragments_per_segment = self.read_unsigned_int() segments.append((first_segment, fragments_per_segment)) @@ -91,12 +91,12 @@ class FlvReader(io.BytesIO): quality_entry_count = self.read_unsigned_char() # QualitySegmentUrlModifiers - for i in range(quality_entry_count): + for _ in range(quality_entry_count): self.read_string() fragments_count = self.read_unsigned_int() fragments = [] - for i in range(fragments_count): + for _ in range(fragments_count): first = self.read_unsigned_int() first_ts = self.read_unsigned_long_long() duration = self.read_unsigned_int() @@ -135,11 +135,11 @@ class FlvReader(io.BytesIO): self.read_string() # MovieIdentifier server_count = self.read_unsigned_char() # ServerEntryTable - for i in range(server_count): + for _ in range(server_count): self.read_string() quality_count = self.read_unsigned_char() # QualityEntryTable - for i in range(quality_count): + for _ in range(quality_count): self.read_string() # DrmData self.read_string() @@ -148,14 +148,14 @@ class FlvReader(io.BytesIO): segments_count = self.read_unsigned_char() segments = [] - for i in range(segments_count): + for _ in range(segments_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'asrt' segment = FlvReader(box_data).read_asrt() segments.append(segment) fragments_run_count = self.read_unsigned_char() fragments = [] - for i in range(fragments_run_count): + for _ in range(fragments_run_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'afrt' fragments.append(FlvReader(box_data).read_afrt()) @@ -309,7 +309,7 @@ class F4mFD(FragmentFD): def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') - self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url @@ -326,8 +326,8 @@ class F4mFD(FragmentFD): formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] else: - rate, media = list(filter( - lambda f: int(f[0]) == requested_bitrate, formats))[0] + rate, media = next(filter( + lambda f: int(f[0]) == requested_bitrate, formats)) # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. man_base_url = get_base_url(doc) or man_url diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index b4f003d..0d00196 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -199,7 +199,7 @@ class FragmentFD(FileDownloader): '.ytdl file is corrupt' if is_corrupt else 'Inconsistent state of incomplete fragment download') self.report_warning( - '%s. Restarting from the beginning ...' % message) + f'{message}. Restarting from the beginning ...') ctx['fragment_index'] = resume_len = 0 if 'ytdl_corrupt' in ctx: del ctx['ytdl_corrupt'] @@ -366,10 +366,10 @@ class FragmentFD(FileDownloader): return decrypt_fragment def download_and_append_fragments_multiple(self, *args, **kwargs): - ''' + """ @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list - ''' + """ interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: @@ -424,7 +424,7 @@ class FragmentFD(FileDownloader): finally: tpe.shutdown(wait=True) if not interrupt_trigger[0] and not is_live: - raise KeyboardInterrupt() + raise KeyboardInterrupt # we expect the user wants to stop and DO WANT the preceding postprocessors to run; # so returning a intermediate result here instead of KeyboardInterrupt on live return result diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 4ac5d99..0a00d5d 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -72,7 +72,7 @@ class HlsFD(FragmentFD): def real_download(self, filename, info_dict): man_url = info_dict['url'] - self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url @@ -160,10 +160,12 @@ class HlsFD(FragmentFD): extra_state = ctx.setdefault('extra_state', {}) format_index = info_dict.get('format_index') - extra_query = None - extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') - if extra_param_to_segment_url: - extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) + extra_segment_query = None + if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'): + extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url) + extra_key_query = None + if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'): + extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url) i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} @@ -190,8 +192,8 @@ class HlsFD(FragmentFD): if frag_index <= ctx['fragment_index']: continue frag_url = urljoin(man_url, line) - if extra_query: - frag_url = update_url_query(frag_url, extra_query) + if extra_segment_query: + frag_url = update_url_query(frag_url, extra_segment_query) fragments.append({ 'frag_index': frag_index, @@ -212,8 +214,8 @@ class HlsFD(FragmentFD): frag_index += 1 map_info = parse_m3u8_attributes(line[11:]) frag_url = urljoin(man_url, map_info.get('URI')) - if extra_query: - frag_url = update_url_query(frag_url, extra_query) + if extra_segment_query: + frag_url = update_url_query(frag_url, extra_segment_query) if map_info.get('BYTERANGE'): splitted_byte_range = map_info.get('BYTERANGE').split('@') @@ -228,7 +230,7 @@ class HlsFD(FragmentFD): 'url': frag_url, 'decrypt_info': decrypt_info, 'byte_range': byte_range, - 'media_sequence': media_sequence + 'media_sequence': media_sequence, }) media_sequence += 1 @@ -244,8 +246,10 @@ class HlsFD(FragmentFD): decrypt_info['KEY'] = external_aes_key else: decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) - if extra_query: - decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) + if extra_key_query or extra_segment_query: + # Fall back to extra_segment_query to key for backwards compat + decrypt_info['URI'] = update_url_query( + decrypt_info['URI'], extra_key_query or extra_segment_query) if decrypt_url != decrypt_info['URI']: decrypt_info['KEY'] = None @@ -350,9 +354,8 @@ class HlsFD(FragmentFD): # XXX: this should probably be silent as well # or verify that all segments contain the same data self.report_warning(bug_reports_message( - 'Discarding a %s block found in the middle of the stream; ' - 'if the subtitles display incorrectly,' - % (type(block).__name__))) + f'Discarding a {type(block).__name__} block found in the middle of the stream; ' + 'if the subtitles display incorrectly,')) continue block.write_into(output) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 693828b..c016579 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -176,7 +176,7 @@ class HttpFD(FileDownloader): 'downloaded_bytes': ctx.resume_len, 'total_bytes': ctx.resume_len, }, info_dict) - raise SucceedDownload() + raise SucceedDownload else: # The length does not match, we start the download over self.report_unable_to_resume() @@ -194,7 +194,7 @@ class HttpFD(FileDownloader): def close_stream(): if ctx.stream is not None: - if not ctx.tmpfilename == '-': + if ctx.tmpfilename != '-': ctx.stream.close() ctx.stream = None @@ -268,20 +268,20 @@ class HttpFD(FileDownloader): ctx.filename = self.undo_temp_name(ctx.tmpfilename) self.report_destination(ctx.filename) except OSError as err: - self.report_error('unable to open for writing: %s' % str(err)) + self.report_error(f'unable to open for writing: {err}') return False if self.params.get('xattr_set_filesize', False) and data_len is not None: try: write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: - self.report_error('unable to set filesize xattr: %s' % str(err)) + self.report_error(f'unable to set filesize xattr: {err}') try: ctx.stream.write(data_block) except OSError as err: self.to_stderr('\n') - self.report_error('unable to write data: %s' % str(err)) + self.report_error(f'unable to write data: {err}') return False # Apply rate limit @@ -327,7 +327,7 @@ class HttpFD(FileDownloader): elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() - raise ThrottledDownload() + raise ThrottledDownload elif speed: ctx.throttle_start = None @@ -338,7 +338,7 @@ class HttpFD(FileDownloader): if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter - raise NextFragment() + raise NextFragment if ctx.tmpfilename != '-': ctx.stream.close() diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index dd688f5..62c3a3b 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -251,7 +251,7 @@ class IsmFD(FragmentFD): skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) frag_index = 0 - for i, segment in enumerate(segments): + for segment in segments: frag_index += 1 if frag_index <= ctx['fragment_index']: continue diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index d977dce..3d4f2d7 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION class MhtmlFD(FragmentFD): - _STYLESHEET = """\ + _STYLESHEET = '''\ html, body { margin: 0; padding: 0; @@ -45,7 +45,7 @@ body > figure > img { max-width: 100%; max-height: calc(100vh - 5em); } -""" +''' _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) @@ -57,24 +57,19 @@ body > figure > img { )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): - return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary) + return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid' def _gen_stub(self, *, fragments, frag_boundary, title): output = io.StringIO() - output.write(( + output.write( '<!DOCTYPE html>' '<html>' '<head>' - '' '<meta name="generator" content="yt-dlp {version}">' - '' '<title>{title}</title>' - '' '<style>{styles}</style>' - '<body>' - ).format( - version=escapeHTML(YT_DLP_VERSION), - styles=self._STYLESHEET, - title=escapeHTML(title) - )) + f'<meta name="generator" content="yt-dlp {escapeHTML(YT_DLP_VERSION)}">' + f'<title>{escapeHTML(title)}</title>' + f'<style>{self._STYLESHEET}</style>' + '<body>') t0 = 0 for i, frag in enumerate(fragments): @@ -87,15 +82,12 @@ body > figure > img { num=i + 1, t0=srt_subtitles_timecode(t0), t1=srt_subtitles_timecode(t1), - duration=formatSeconds(frag['duration'], msec=True) + duration=formatSeconds(frag['duration'], msec=True), )) except (KeyError, ValueError, TypeError): t1 = None - output.write(( - '<figcaption>Slide #{num}</figcaption>' - ).format(num=i + 1)) - output.write('<img src="cid:{cid}">'.format( - cid=self._gen_cid(i, frag, frag_boundary))) + output.write(f'<figcaption>Slide #{i + 1}</figcaption>') + output.write(f'<img src="cid:{self._gen_cid(i, frag, frag_boundary)}">') output.write('</figure>') t0 = t1 @@ -126,31 +118,24 @@ body > figure > img { stub = self._gen_stub( fragments=fragments, frag_boundary=frag_boundary, - title=title + title=title, ) ctx['dest_stream'].write(( 'MIME-Version: 1.0\r\n' 'From: <nowhere@yt-dlp.github.io.invalid>\r\n' 'To: <nowhere@yt-dlp.github.io.invalid>\r\n' - 'Subject: {title}\r\n' + f'Subject: {self._escape_mime(title)}\r\n' 'Content-type: multipart/related; ' - '' 'boundary="{boundary}"; ' - '' 'type="text/html"\r\n' - 'X.yt-dlp.Origin: {origin}\r\n' + f'boundary="{frag_boundary}"; ' + 'type="text/html"\r\n' + f'X.yt-dlp.Origin: {origin}\r\n' '\r\n' - '--{boundary}\r\n' + f'--{frag_boundary}\r\n' 'Content-Type: text/html; charset=utf-8\r\n' - 'Content-Length: {length}\r\n' + f'Content-Length: {len(stub)}\r\n' '\r\n' - '{stub}\r\n' - ).format( - origin=origin, - boundary=frag_boundary, - length=len(stub), - title=self._escape_mime(title), - stub=stub - ).encode()) + f'{stub}\r\n').encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index fef8bff..462c6e2 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -15,7 +15,7 @@ class NiconicoDmcFD(FileDownloader): def real_download(self, filename, info_dict): from ..extractor.niconico import NiconicoIE - self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading from DMC') ie = NiconicoIE(self.ydl) info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) @@ -34,7 +34,7 @@ class NiconicoDmcFD(FileDownloader): try: self.ydl.urlopen(request).read() except Exception: - self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Heartbeat failed') with heartbeat_lock: if not download_complete: @@ -85,14 +85,14 @@ class NiconicoLiveFD(FileDownloader): 'quality': live_quality, 'protocol': 'hls+fmp4', 'latency': live_latency, - 'chasePlay': False + 'chasePlay': False, }, 'room': { 'protocol': 'webSocket', - 'commentable': True + 'commentable': True, }, 'reconnect': True, - } + }, })) else: ws = ws_extractor @@ -118,7 +118,7 @@ class NiconicoLiveFD(FileDownloader): elif self.ydl.params.get('verbose', False): if len(recv) > 100: recv = recv[:100] + '...' - self.to_screen('[debug] Server said: %s' % recv) + self.to_screen(f'[debug] Server said: {recv}') def ws_main(): reconnect = False @@ -128,7 +128,7 @@ class NiconicoLiveFD(FileDownloader): if ret is True: return except BaseException as e: - self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) + self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) time.sleep(10) continue finally: diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index 0e09525..d7ffb3b 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -180,9 +180,9 @@ class RtmpFD(FileDownloader): while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: prevsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) + self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') time.sleep(5.0) # This seems to be needed - args = basic_args + ['--resume'] + args = [*basic_args, '--resume'] if retval == RD_FAILED: args += ['--skip', '1'] args = [encodeArgument(a) for a in args] @@ -197,7 +197,7 @@ class RtmpFD(FileDownloader): break if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) + self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index c7a8637..961938d 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD): def real_download(self, filename, info_dict): video_id = info_dict['video_id'] - self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading live chat') if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': self.report_warning('Live chat download runs until the livestream ends. ' 'If you wish to download the video simultaneously, run a separate yt-dlp instance') diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e9cd38a..7f6507d 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -76,6 +76,7 @@ from .aenetworks import ( ) from .aeonco import AeonCoIE from .afreecatv import ( + AfreecaTVCatchStoryIE, AfreecaTVIE, AfreecaTVLiveIE, AfreecaTVUserIE, @@ -779,6 +780,7 @@ from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE +from .graspop import GraspopIE from .gronkh import ( GronkhFeedIE, GronkhIE, @@ -969,6 +971,10 @@ from .la7 import ( LA7PodcastEpisodeIE, LA7PodcastIE, ) +from .laracasts import ( + LaracastsIE, + LaracastsPlaylistIE, +) from .lastfm import ( LastFMIE, LastFMPlaylistIE, @@ -1113,12 +1119,15 @@ from .meipai import MeipaiIE from .melonvod import MelonVODIE from .metacritic import MetacriticIE from .mgtv import MGTVIE -from .microsoftembed import MicrosoftEmbedIE -from .microsoftstream import MicrosoftStreamIE -from .microsoftvirtualacademy import ( - MicrosoftVirtualAcademyCourseIE, - MicrosoftVirtualAcademyIE, +from .microsoftembed import ( + MicrosoftBuildIE, + MicrosoftEmbedIE, + MicrosoftLearnEpisodeIE, + MicrosoftLearnPlaylistIE, + MicrosoftLearnSessionIE, + MicrosoftMediusIE, ) +from .microsoftstream import MicrosoftStreamIE from .mildom import ( MildomClipIE, MildomIE, @@ -1603,6 +1612,7 @@ from .qqmusic import ( QQMusicPlaylistIE, QQMusicSingerIE, QQMusicToplistIE, + QQMusicVideoIE, ) from .r7 import ( R7IE, @@ -1755,7 +1765,10 @@ from .rtve import ( RTVETelevisionIE, ) from .rtvs import RTVSIE -from .rtvslo import RTVSLOIE +from .rtvslo import ( + RTVSLOIE, + RTVSLOShowIE, +) from .rudovideo import RudoVideoIE from .rule34video import Rule34VideoIE from .rumble import ( @@ -1925,6 +1938,10 @@ from .spreaker import ( ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE +from .sproutvideo import ( + SproutVideoIE, + VidsIoIE, +) from .srgssr import ( SRGSSRIE, SRGSSRPlayIE, diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 2c0d296..7518ba6 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -4,7 +4,6 @@ import re import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, dict_get, @@ -67,7 +66,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'WWI Centenary', 'description': 'md5:c2379ec0ca84072e86b446e536954546', - } + }, }, { 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', 'info_dict': { @@ -75,7 +74,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', - } + }, }, { 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', 'info_dict': { @@ -86,7 +85,7 @@ class ABCIE(InfoExtractor): 'upload_date': '20200813', 'uploader': 'Behind the News', 'uploader_id': 'behindthenews', - } + }, }, { 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', 'info_dict': { @@ -95,7 +94,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', - } + }, }] def _real_extract(self, url): @@ -126,7 +125,7 @@ class ABCIE(InfoExtractor): if mobj is None: expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None) if expired: - raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True) raise ExtractorError('Unable to extract video urls') urls_info = self._parse_json( @@ -164,7 +163,7 @@ class ABCIE(InfoExtractor): 'height': height, 'tbr': bitrate, 'filesize': int_or_none(url_info.get('filesize')), - 'format_id': format_id + 'format_id': format_id, }) return { @@ -288,13 +287,12 @@ class ABCIViewIE(InfoExtractor): stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) house_number = video_params.get('episodeHouseNumber') or video_id - path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( - int(time.time()), house_number) + path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet' sig = hmac.new( b'android.content.res.Resources', - path.encode('utf-8'), hashlib.sha256).hexdigest() + path.encode(), hashlib.sha256).hexdigest() token = self._download_webpage( - 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) + f'http://iview.abc.net.au{path}&sig={sig}', video_id) def tokenize_url(url, token): return update_url_query(url, { @@ -303,7 +301,7 @@ class ABCIViewIE(InfoExtractor): for sd in ('1080', '720', 'sd', 'sd-low'): sd_url = try_get( - stream, lambda x: x['streams']['hls'][sd], compat_str) + stream, lambda x: x['streams']['hls'][sd], str) if not sd_url: continue formats = self._extract_m3u8_formats( @@ -358,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'md5:93119346c24a7c322d446d8eece430ff', 'series': 'Upper Middle Bogan', 'season': 'Series 1', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 8, }, { @@ -386,7 +384,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.', 'series': '7.30 Mark Humphries Satire', 'season': 'Episodes', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 15, }] @@ -398,7 +396,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', webpage, 'initial state') video_data = self._parse_json( - unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id) + unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id) video_data = video_data['route']['pageData']['_embedded'] highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) diff --git a/yt_dlp/extractor/abcnews.py b/yt_dlp/extractor/abcnews.py index a57295b..7215500 100644 --- a/yt_dlp/extractor/abcnews.py +++ b/yt_dlp/extractor/abcnews.py @@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE): display_id = mobj.group('display_id') video_id = mobj.group('id') info_dict = self._extract_feed_info( - 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) + f'http://abcnews.go.com/video/itemfeed?id={video_id}') info_dict.update({ 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 6dca19d..ea5882b 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( dict_get, int_or_none, @@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor): data = self._download_json( 'https://api.abcotvs.com/v2/content', display_id, query={ 'id': video_id, - 'key': 'otv.web.%s.story' % station, + 'key': f'otv.web.{station}.story', 'station': station, })['data'] video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data - video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) + video_id = str(dict_get(video, ('id', 'publishedKey'), video_id)) title = video.get('title') or video['linkText'] formats = [] diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index b8c79b9..293a6c4 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -66,8 +66,8 @@ class AbemaLicenseHandler(urllib.request.BaseHandler): query={'t': media_token}, data=json.dumps({ 'kv': 'a', - 'lt': ticket - }).encode('utf-8'), + 'lt': ticket, + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -77,7 +77,7 @@ class AbemaLicenseHandler(urllib.request.BaseHandler): h = hmac.new( binascii.unhexlify(self.HKEY), - (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), + (license_response['cid'] + self.ie._DEVICE_ID).encode(), digestmod=hashlib.sha256) enckey = bytes_to_intlist(h.digest()) @@ -103,11 +103,11 @@ class AbemaTVBaseIE(InfoExtractor): @classmethod def _generate_aks(cls, deviceid): - deviceid = deviceid.encode('utf-8') + deviceid = deviceid.encode() # add 1 hour and then drop minute and secs ts_1hour = int((time_seconds() // 3600 + 1) * 3600) time_struct = time.gmtime(ts_1hour) - ts_1hour_str = str(ts_1hour).encode('utf-8') + ts_1hour_str = str(ts_1hour).encode() tmp = None @@ -119,7 +119,7 @@ class AbemaTVBaseIE(InfoExtractor): def mix_tmp(count): nonlocal tmp - for i in range(count): + for _ in range(count): mix_once(tmp) def mix_twist(nonce): @@ -160,7 +160,7 @@ class AbemaTVBaseIE(InfoExtractor): data=json.dumps({ 'deviceId': self._DEVICE_ID, 'applicationKeySecret': aks, - }).encode('utf-8'), + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -180,7 +180,7 @@ class AbemaTVBaseIE(InfoExtractor): 'osLang': 'ja_JP', 'osTimezone': 'Asia/Tokyo', 'appId': 'tv.abema', - 'appVersion': '3.27.1' + 'appVersion': '3.27.1', }, headers={ 'Authorization': f'bearer {self._get_device_token()}', })['token'] @@ -202,8 +202,8 @@ class AbemaTVBaseIE(InfoExtractor): f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in', data=json.dumps({ method: username, - 'password': password - }).encode('utf-8'), headers={ + 'password': password, + }).encode(), headers={ 'Authorization': f'bearer {self._get_device_token()}', 'Origin': 'https://abema.tv', 'Referer': 'https://abema.tv/', @@ -344,7 +344,7 @@ class AbemaTVIE(AbemaTVBaseIE): description = self._html_search_regex( (r'<p\s+class="com-video-EpisodeDetailsBlock__content"><span\s+class=".+?">(.+?)</span></p><div', - r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div',), + r'<span\s+class=".+?SlotSummary.+?">(.+?)</span></div><div'), webpage, 'description', default=None, group=1) if not description: og_desc = self._html_search_meta( diff --git a/yt_dlp/extractor/acast.py b/yt_dlp/extractor/acast.py index 427d04c..8f4a2cf 100644 --- a/yt_dlp/extractor/acast.py +++ b/yt_dlp/extractor/acast.py @@ -67,7 +67,7 @@ class ACastIE(ACastBaseIE): 'display_id': '2.raggarmordet-rosterurdetforflutna', 'season_number': 4, 'season': 'Season 4', - } + }, }, { 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', 'only_matching': True, @@ -93,13 +93,13 @@ class ACastIE(ACastBaseIE): 'series': 'Democracy Sausage with Mark Kenny', 'timestamp': 1684826362, 'description': 'md5:feabe1fc5004c78ee59c84a46bf4ba16', - } + }, }] def _real_extract(self, url): channel, display_id = self._match_valid_url(url).groups() episode = self._call_api( - '%s/episodes/%s' % (channel, display_id), + f'{channel}/episodes/{display_id}', display_id, {'showInfo': 'true'}) return self._extract_episode( episode, self._extract_show_info(episode.get('show') or {})) @@ -130,7 +130,7 @@ class ACastChannelIE(ACastBaseIE): @classmethod def suitable(cls, url): - return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) + return False if ACastIE.suitable(url) else super().suitable(url) def _real_extract(self, url): show_slug = self._match_id(url) diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py index 0793319..28559ba 100644 --- a/yt_dlp/extractor/acfun.py +++ b/yt_dlp/extractor/acfun.py @@ -25,7 +25,7 @@ class AcFunVideoBaseIE(InfoExtractor): 'width': int_or_none(video.get('width')), 'height': int_or_none(video.get('height')), 'tbr': float_or_none(video.get('avgBitrate')), - **parse_codecs(video.get('codecs', '')) + **parse_codecs(video.get('codecs', '')), }) return { @@ -77,7 +77,7 @@ class AcFunVideoIE(AcFunVideoBaseIE): 'comment_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg)', 'description': 'md5:67583aaf3a0f933bd606bc8a2d3ebb17', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 2f3b67d..7be990b 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -7,7 +7,6 @@ import time from .common import InfoExtractor from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 -from ..compat import compat_b64decode from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -111,9 +110,9 @@ class ADNIE(ADNBaseIE): # http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( - compat_b64decode(enc_subtitles[24:]), + base64.b64decode(enc_subtitles[24:]), binascii.unhexlify(self._K + '7fac1178830cfe0c'), - compat_b64decode(enc_subtitles[:24]))) + base64.b64decode(enc_subtitles[:24]))) subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False) if not subtitles_json: return None @@ -136,7 +135,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' if start is None or end is None or text is None: continue alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) - ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( + ssa += os.linesep + 'Dialogue: Marked=0,{},{},Default,,0,0,0,,{}{}'.format( ass_subtitles_timecode(start), ass_subtitles_timecode(end), '{\\a%d}' % alignment if alignment != 2 else '', @@ -178,7 +177,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' def _real_extract(self, url): lang, video_id = self._match_valid_url(url).group('lang', 'id') - video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id + video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/' player = self._download_json( video_base_url + 'configuration', video_id, 'Downloading player config JSON metadata', @@ -219,12 +218,12 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' links_url, video_id, 'Downloading links JSON metadata', headers={ 'X-Player-Token': authorization, 'X-Target-Distribution': lang, - **self._HEADERS + **self._HEADERS, }, query={ 'freeWithAds': 'true', 'adaptive': 'false', 'withMetadata': 'true', - 'source': 'Web' + 'source': 'Web', }) break except ExtractorError as e: @@ -256,7 +255,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' for quality, load_balancer_url in qualities.items(): load_balancer_data = self._download_json( load_balancer_url, video_id, - 'Downloading %s %s JSON metadata' % (format_id, quality), + f'Downloading {format_id} {quality} JSON metadata', fatal=False) or {} m3u8_url = load_balancer_data.get('location') if not m3u8_url: @@ -276,7 +275,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' self.raise_login_required('This video requires a subscription', method='password') video = (self._download_json( - self._API_BASE_URL + 'video/%s' % video_id, video_id, + self._API_BASE_URL + f'video/{video_id}', video_id, 'Downloading additional video metadata', fatal=False) or {}).get('video') or {} show = video.get('show') or {} @@ -320,7 +319,7 @@ class ADNSeasonIE(ADNBaseIE): f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug, 'Downloading episode list', headers={ 'X-Target-Distribution': lang, - **self._HEADERS + **self._HEADERS, }, query={ 'order': 'asc', 'limit': '-1', diff --git a/yt_dlp/extractor/adobeconnect.py b/yt_dlp/extractor/adobeconnect.py index 8963b12..b2934d6 100644 --- a/yt_dlp/extractor/adobeconnect.py +++ b/yt_dlp/extractor/adobeconnect.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) class AdobeConnectIE(InfoExtractor): @@ -12,13 +10,13 @@ class AdobeConnectIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_extract_title(webpage) - qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) + qs = urllib.parse.parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) is_live = qs.get('isLive', ['false'])[0] == 'true' formats = [] for con_string in qs['conStrings'][0].split(','): formats.append({ 'format_id': con_string.split('://')[0], - 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), + 'app': urllib.parse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), 'ext': 'flv', 'play_path': 'mp4:' + qs['streamName'][0], 'rtmp_conn': 'S:' + qs['ticket'][0], diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 5eed0ca..eb7e597 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -2,10 +2,10 @@ import getpass import json import re import time +import urllib.parse import xml.etree.ElementTree as etree from .common import InfoExtractor -from ..compat import compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( NO_DEFAULT, @@ -68,7 +68,7 @@ MSO_INFO = { }, 'Philo': { 'name': 'Philo', - 'username_field': 'ident' + 'username_field': 'ident', }, 'Verizon': { 'name': 'Verizon FiOS', @@ -81,1258 +81,1258 @@ MSO_INFO = { 'password_field': 'j_password', }, 'thr030': { - 'name': '3 Rivers Communications' + 'name': '3 Rivers Communications', }, 'com140': { - 'name': 'Access Montana' + 'name': 'Access Montana', }, 'acecommunications': { - 'name': 'AcenTek' + 'name': 'AcenTek', }, 'acm010': { - 'name': 'Acme Communications' + 'name': 'Acme Communications', }, 'ada020': { - 'name': 'Adams Cable Service' + 'name': 'Adams Cable Service', }, 'alb020': { - 'name': 'Albany Mutual Telephone' + 'name': 'Albany Mutual Telephone', }, 'algona': { - 'name': 'Algona Municipal Utilities' + 'name': 'Algona Municipal Utilities', }, 'allwest': { - 'name': 'All West Communications' + 'name': 'All West Communications', }, 'all025': { - 'name': 'Allen\'s Communications' + 'name': 'Allen\'s Communications', }, 'spl010': { - 'name': 'Alliance Communications' + 'name': 'Alliance Communications', }, 'all070': { - 'name': 'ALLO Communications' + 'name': 'ALLO Communications', }, 'alpine': { - 'name': 'Alpine Communications' + 'name': 'Alpine Communications', }, 'hun015': { - 'name': 'American Broadband' + 'name': 'American Broadband', }, 'nwc010': { - 'name': 'American Broadband Missouri' + 'name': 'American Broadband Missouri', }, 'com130-02': { - 'name': 'American Community Networks' + 'name': 'American Community Networks', }, 'com130-01': { - 'name': 'American Warrior Networks' + 'name': 'American Warrior Networks', }, 'tom020': { - 'name': 'Amherst Telephone/Tomorrow Valley' + 'name': 'Amherst Telephone/Tomorrow Valley', }, 'tvc020': { - 'name': 'Andycable' + 'name': 'Andycable', }, 'arkwest': { - 'name': 'Arkwest Communications' + 'name': 'Arkwest Communications', }, 'art030': { - 'name': 'Arthur Mutual Telephone Company' + 'name': 'Arthur Mutual Telephone Company', }, 'arvig': { - 'name': 'Arvig' + 'name': 'Arvig', }, 'nttcash010': { - 'name': 'Ashland Home Net' + 'name': 'Ashland Home Net', }, 'astound': { - 'name': 'Astound (now Wave)' + 'name': 'Astound (now Wave)', }, 'dix030': { - 'name': 'ATC Broadband' + 'name': 'ATC Broadband', }, 'ara010': { - 'name': 'ATC Communications' + 'name': 'ATC Communications', }, 'she030-02': { - 'name': 'Ayersville Communications' + 'name': 'Ayersville Communications', }, 'baldwin': { - 'name': 'Baldwin Lightstream' + 'name': 'Baldwin Lightstream', }, 'bal040': { - 'name': 'Ballard TV' + 'name': 'Ballard TV', }, 'cit025': { - 'name': 'Bardstown Cable TV' + 'name': 'Bardstown Cable TV', }, 'bay030': { - 'name': 'Bay Country Communications' + 'name': 'Bay Country Communications', }, 'tel095': { - 'name': 'Beaver Creek Cooperative Telephone' + 'name': 'Beaver Creek Cooperative Telephone', }, 'bea020': { - 'name': 'Beaver Valley Cable' + 'name': 'Beaver Valley Cable', }, 'bee010': { - 'name': 'Bee Line Cable' + 'name': 'Bee Line Cable', }, 'wir030': { - 'name': 'Beehive Broadband' + 'name': 'Beehive Broadband', }, 'bra020': { - 'name': 'BELD' + 'name': 'BELD', }, 'bel020': { - 'name': 'Bellevue Municipal Cable' + 'name': 'Bellevue Municipal Cable', }, 'vol040-01': { - 'name': 'Ben Lomand Connect / BLTV' + 'name': 'Ben Lomand Connect / BLTV', }, 'bev010': { - 'name': 'BEVCOMM' + 'name': 'BEVCOMM', }, 'big020': { - 'name': 'Big Sandy Broadband' + 'name': 'Big Sandy Broadband', }, 'ble020': { - 'name': 'Bledsoe Telephone Cooperative' + 'name': 'Bledsoe Telephone Cooperative', }, 'bvt010': { - 'name': 'Blue Valley Tele-Communications' + 'name': 'Blue Valley Tele-Communications', }, 'bra050': { - 'name': 'Brandenburg Telephone Co.' + 'name': 'Brandenburg Telephone Co.', }, 'bte010': { - 'name': 'Bristol Tennessee Essential Services' + 'name': 'Bristol Tennessee Essential Services', }, 'annearundel': { - 'name': 'Broadstripe' + 'name': 'Broadstripe', }, 'btc010': { - 'name': 'BTC Communications' + 'name': 'BTC Communications', }, 'btc040': { - 'name': 'BTC Vision - Nahunta' + 'name': 'BTC Vision - Nahunta', }, 'bul010': { - 'name': 'Bulloch Telephone Cooperative' + 'name': 'Bulloch Telephone Cooperative', }, 'but010': { - 'name': 'Butler-Bremer Communications' + 'name': 'Butler-Bremer Communications', }, 'tel160-csp': { - 'name': 'C Spire SNAP' + 'name': 'C Spire SNAP', }, 'csicable': { - 'name': 'Cable Services Inc.' + 'name': 'Cable Services Inc.', }, 'cableamerica': { - 'name': 'CableAmerica' + 'name': 'CableAmerica', }, 'cab038': { - 'name': 'CableSouth Media 3' + 'name': 'CableSouth Media 3', }, 'weh010-camtel': { - 'name': 'Cam-Tel Company' + 'name': 'Cam-Tel Company', }, 'car030': { - 'name': 'Cameron Communications' + 'name': 'Cameron Communications', }, 'canbytel': { - 'name': 'Canby Telcom' + 'name': 'Canby Telcom', }, 'crt020': { - 'name': 'CapRock Tv' + 'name': 'CapRock Tv', }, 'car050': { - 'name': 'Carnegie Cable' + 'name': 'Carnegie Cable', }, 'cas': { - 'name': 'CAS Cable' + 'name': 'CAS Cable', }, 'casscomm': { - 'name': 'CASSCOMM' + 'name': 'CASSCOMM', }, 'mid180-02': { - 'name': 'Catalina Broadband Solutions' + 'name': 'Catalina Broadband Solutions', }, 'cccomm': { - 'name': 'CC Communications' + 'name': 'CC Communications', }, 'nttccde010': { - 'name': 'CDE Lightband' + 'name': 'CDE Lightband', }, 'cfunet': { - 'name': 'Cedar Falls Utilities' + 'name': 'Cedar Falls Utilities', }, 'dem010-01': { - 'name': 'Celect-Bloomer Telephone Area' + 'name': 'Celect-Bloomer Telephone Area', }, 'dem010-02': { - 'name': 'Celect-Bruce Telephone Area' + 'name': 'Celect-Bruce Telephone Area', }, 'dem010-03': { - 'name': 'Celect-Citizens Connected Area' + 'name': 'Celect-Citizens Connected Area', }, 'dem010-04': { - 'name': 'Celect-Elmwood/Spring Valley Area' + 'name': 'Celect-Elmwood/Spring Valley Area', }, 'dem010-06': { - 'name': 'Celect-Mosaic Telecom' + 'name': 'Celect-Mosaic Telecom', }, 'dem010-05': { - 'name': 'Celect-West WI Telephone Area' + 'name': 'Celect-West WI Telephone Area', }, 'net010-02': { - 'name': 'Cellcom/Nsight Telservices' + 'name': 'Cellcom/Nsight Telservices', }, 'cen100': { - 'name': 'CentraCom' + 'name': 'CentraCom', }, 'nttccst010': { - 'name': 'Central Scott / CSTV' + 'name': 'Central Scott / CSTV', }, 'cha035': { - 'name': 'Chaparral CableVision' + 'name': 'Chaparral CableVision', }, 'cha050': { - 'name': 'Chariton Valley Communication Corporation, Inc.' + 'name': 'Chariton Valley Communication Corporation, Inc.', }, 'cha060': { - 'name': 'Chatmoss Cablevision' + 'name': 'Chatmoss Cablevision', }, 'nttcche010': { - 'name': 'Cherokee Communications' + 'name': 'Cherokee Communications', }, 'che050': { - 'name': 'Chesapeake Bay Communications' + 'name': 'Chesapeake Bay Communications', }, 'cimtel': { - 'name': 'Cim-Tel Cable, LLC.' + 'name': 'Cim-Tel Cable, LLC.', }, 'cit180': { - 'name': 'Citizens Cablevision - Floyd, VA' + 'name': 'Citizens Cablevision - Floyd, VA', }, 'cit210': { - 'name': 'Citizens Cablevision, Inc.' + 'name': 'Citizens Cablevision, Inc.', }, 'cit040': { - 'name': 'Citizens Fiber' + 'name': 'Citizens Fiber', }, 'cit250': { - 'name': 'Citizens Mutual' + 'name': 'Citizens Mutual', }, 'war040': { - 'name': 'Citizens Telephone Corporation' + 'name': 'Citizens Telephone Corporation', }, 'wat025': { - 'name': 'City Of Monroe' + 'name': 'City Of Monroe', }, 'wadsworth': { - 'name': 'CityLink' + 'name': 'CityLink', }, 'nor100': { - 'name': 'CL Tel' + 'name': 'CL Tel', }, 'cla010': { - 'name': 'Clarence Telephone and Cedar Communications' + 'name': 'Clarence Telephone and Cedar Communications', }, 'ser060': { - 'name': 'Clear Choice Communications' + 'name': 'Clear Choice Communications', }, 'tac020': { - 'name': 'Click! Cable TV' + 'name': 'Click! Cable TV', }, 'war020': { - 'name': 'CLICK1.NET' + 'name': 'CLICK1.NET', }, 'cml010': { - 'name': 'CML Telephone Cooperative Association' + 'name': 'CML Telephone Cooperative Association', }, 'cns': { - 'name': 'CNS' + 'name': 'CNS', }, 'com160': { - 'name': 'Co-Mo Connect' + 'name': 'Co-Mo Connect', }, 'coa020': { - 'name': 'Coast Communications' + 'name': 'Coast Communications', }, 'coa030': { - 'name': 'Coaxial Cable TV' + 'name': 'Coaxial Cable TV', }, 'mid055': { - 'name': 'Cobalt TV (Mid-State Community TV)' + 'name': 'Cobalt TV (Mid-State Community TV)', }, 'col070': { - 'name': 'Columbia Power & Water Systems' + 'name': 'Columbia Power & Water Systems', }, 'col080': { - 'name': 'Columbus Telephone' + 'name': 'Columbus Telephone', }, 'nor105': { - 'name': 'Communications 1 Cablevision, Inc.' + 'name': 'Communications 1 Cablevision, Inc.', }, 'com150': { - 'name': 'Community Cable & Broadband' + 'name': 'Community Cable & Broadband', }, 'com020': { - 'name': 'Community Communications Company' + 'name': 'Community Communications Company', }, 'coy010': { - 'name': 'commZoom' + 'name': 'commZoom', }, 'com025': { - 'name': 'Complete Communication Services' + 'name': 'Complete Communication Services', }, 'cat020': { - 'name': 'Comporium' + 'name': 'Comporium', }, 'com071': { - 'name': 'ComSouth Telesys' + 'name': 'ComSouth Telesys', }, 'consolidatedcable': { - 'name': 'Consolidated' + 'name': 'Consolidated', }, 'conwaycorp': { - 'name': 'Conway Corporation' + 'name': 'Conway Corporation', }, 'coo050': { - 'name': 'Coon Valley Telecommunications Inc' + 'name': 'Coon Valley Telecommunications Inc', }, 'coo080': { - 'name': 'Cooperative Telephone Company' + 'name': 'Cooperative Telephone Company', }, 'cpt010': { - 'name': 'CP-TEL' + 'name': 'CP-TEL', }, 'cra010': { - 'name': 'Craw-Kan Telephone' + 'name': 'Craw-Kan Telephone', }, 'crestview': { - 'name': 'Crestview Cable Communications' + 'name': 'Crestview Cable Communications', }, 'cross': { - 'name': 'Cross TV' + 'name': 'Cross TV', }, 'cro030': { - 'name': 'Crosslake Communications' + 'name': 'Crosslake Communications', }, 'ctc040': { - 'name': 'CTC - Brainerd MN' + 'name': 'CTC - Brainerd MN', }, 'phe030': { - 'name': 'CTV-Beam - East Alabama' + 'name': 'CTV-Beam - East Alabama', }, 'cun010': { - 'name': 'Cunningham Telephone & Cable' + 'name': 'Cunningham Telephone & Cable', }, 'dpc010': { - 'name': 'D & P Communications' + 'name': 'D & P Communications', }, 'dak030': { - 'name': 'Dakota Central Telecommunications' + 'name': 'Dakota Central Telecommunications', }, 'nttcdel010': { - 'name': 'Delcambre Telephone LLC' + 'name': 'Delcambre Telephone LLC', }, 'tel160-del': { - 'name': 'Delta Telephone Company' + 'name': 'Delta Telephone Company', }, 'sal040': { - 'name': 'DiamondNet' + 'name': 'DiamondNet', }, 'ind060-dc': { - 'name': 'Direct Communications' + 'name': 'Direct Communications', }, 'doy010': { - 'name': 'Doylestown Cable TV' + 'name': 'Doylestown Cable TV', }, 'dic010': { - 'name': 'DRN' + 'name': 'DRN', }, 'dtc020': { - 'name': 'DTC' + 'name': 'DTC', }, 'dtc010': { - 'name': 'DTC Cable (Delhi)' + 'name': 'DTC Cable (Delhi)', }, 'dum010': { - 'name': 'Dumont Telephone Company' + 'name': 'Dumont Telephone Company', }, 'dun010': { - 'name': 'Dunkerton Telephone Cooperative' + 'name': 'Dunkerton Telephone Cooperative', }, 'cci010': { - 'name': 'Duo County Telecom' + 'name': 'Duo County Telecom', }, 'eagle': { - 'name': 'Eagle Communications' + 'name': 'Eagle Communications', }, 'weh010-east': { - 'name': 'East Arkansas Cable TV' + 'name': 'East Arkansas Cable TV', }, 'eatel': { - 'name': 'EATEL Video, LLC' + 'name': 'EATEL Video, LLC', }, 'ell010': { - 'name': 'ECTA' + 'name': 'ECTA', }, 'emerytelcom': { - 'name': 'Emery Telcom Video LLC' + 'name': 'Emery Telcom Video LLC', }, 'nor200': { - 'name': 'Empire Access' + 'name': 'Empire Access', }, 'endeavor': { - 'name': 'Endeavor Communications' + 'name': 'Endeavor Communications', }, 'sun045': { - 'name': 'Enhanced Telecommunications Corporation' + 'name': 'Enhanced Telecommunications Corporation', }, 'mid030': { - 'name': 'enTouch' + 'name': 'enTouch', }, 'epb020': { - 'name': 'EPB Smartnet' + 'name': 'EPB Smartnet', }, 'jea010': { - 'name': 'EPlus Broadband' + 'name': 'EPlus Broadband', }, 'com065': { - 'name': 'ETC' + 'name': 'ETC', }, 'ete010': { - 'name': 'Etex Communications' + 'name': 'Etex Communications', }, 'fbc-tele': { - 'name': 'F&B Communications' + 'name': 'F&B Communications', }, 'fal010': { - 'name': 'Falcon Broadband' + 'name': 'Falcon Broadband', }, 'fam010': { - 'name': 'FamilyView CableVision' + 'name': 'FamilyView CableVision', }, 'far020': { - 'name': 'Farmers Mutual Telephone Company' + 'name': 'Farmers Mutual Telephone Company', }, 'fay010': { - 'name': 'Fayetteville Public Utilities' + 'name': 'Fayetteville Public Utilities', }, 'sal060': { - 'name': 'fibrant' + 'name': 'fibrant', }, 'fid010': { - 'name': 'Fidelity Communications' + 'name': 'Fidelity Communications', }, 'for030': { - 'name': 'FJ Communications' + 'name': 'FJ Communications', }, 'fli020': { - 'name': 'Flint River Communications' + 'name': 'Flint River Communications', }, 'far030': { - 'name': 'FMT - Jesup' + 'name': 'FMT - Jesup', }, 'foo010': { - 'name': 'Foothills Communications' + 'name': 'Foothills Communications', }, 'for080': { - 'name': 'Forsyth CableNet' + 'name': 'Forsyth CableNet', }, 'fbcomm': { - 'name': 'Frankfort Plant Board' + 'name': 'Frankfort Plant Board', }, 'tel160-fra': { - 'name': 'Franklin Telephone Company' + 'name': 'Franklin Telephone Company', }, 'nttcftc010': { - 'name': 'FTC' + 'name': 'FTC', }, 'fullchannel': { - 'name': 'Full Channel, Inc.' + 'name': 'Full Channel, Inc.', }, 'gar040': { - 'name': 'Gardonville Cooperative Telephone Association' + 'name': 'Gardonville Cooperative Telephone Association', }, 'gbt010': { - 'name': 'GBT Communications, Inc.' + 'name': 'GBT Communications, Inc.', }, 'tec010': { - 'name': 'Genuine Telecom' + 'name': 'Genuine Telecom', }, 'clr010': { - 'name': 'Giant Communications' + 'name': 'Giant Communications', }, 'gla010': { - 'name': 'Glasgow EPB' + 'name': 'Glasgow EPB', }, 'gle010': { - 'name': 'Glenwood Telecommunications' + 'name': 'Glenwood Telecommunications', }, 'gra060': { - 'name': 'GLW Broadband Inc.' + 'name': 'GLW Broadband Inc.', }, 'goldenwest': { - 'name': 'Golden West Cablevision' + 'name': 'Golden West Cablevision', }, 'vis030': { - 'name': 'Grantsburg Telcom' + 'name': 'Grantsburg Telcom', }, 'gpcom': { - 'name': 'Great Plains Communications' + 'name': 'Great Plains Communications', }, 'gri010': { - 'name': 'Gridley Cable Inc' + 'name': 'Gridley Cable Inc', }, 'hbc010': { - 'name': 'H&B Cable Services' + 'name': 'H&B Cable Services', }, 'hae010': { - 'name': 'Haefele TV Inc.' + 'name': 'Haefele TV Inc.', }, 'htc010': { - 'name': 'Halstad Telephone Company' + 'name': 'Halstad Telephone Company', }, 'har005': { - 'name': 'Harlan Municipal Utilities' + 'name': 'Harlan Municipal Utilities', }, 'har020': { - 'name': 'Hart Communications' + 'name': 'Hart Communications', }, 'ced010': { - 'name': 'Hartelco TV' + 'name': 'Hartelco TV', }, 'hea040': { - 'name': 'Heart of Iowa Communications Cooperative' + 'name': 'Heart of Iowa Communications Cooperative', }, 'htc020': { - 'name': 'Hickory Telephone Company' + 'name': 'Hickory Telephone Company', }, 'nttchig010': { - 'name': 'Highland Communication Services' + 'name': 'Highland Communication Services', }, 'hig030': { - 'name': 'Highland Media' + 'name': 'Highland Media', }, 'spc010': { - 'name': 'Hilliary Communications' + 'name': 'Hilliary Communications', }, 'hin020': { - 'name': 'Hinton CATV Co.' + 'name': 'Hinton CATV Co.', }, 'hometel': { - 'name': 'HomeTel Entertainment, Inc.' + 'name': 'HomeTel Entertainment, Inc.', }, 'hoodcanal': { - 'name': 'Hood Canal Communications' + 'name': 'Hood Canal Communications', }, 'weh010-hope': { - 'name': 'Hope - Prescott Cable TV' + 'name': 'Hope - Prescott Cable TV', }, 'horizoncable': { - 'name': 'Horizon Cable TV, Inc.' + 'name': 'Horizon Cable TV, Inc.', }, 'hor040': { - 'name': 'Horizon Chillicothe Telephone' + 'name': 'Horizon Chillicothe Telephone', }, 'htc030': { - 'name': 'HTC Communications Co. - IL' + 'name': 'HTC Communications Co. - IL', }, 'htccomm': { - 'name': 'HTC Communications, Inc. - IA' + 'name': 'HTC Communications, Inc. - IA', }, 'wal005': { - 'name': 'Huxley Communications' + 'name': 'Huxley Communications', }, 'imon': { - 'name': 'ImOn Communications' + 'name': 'ImOn Communications', }, 'ind040': { - 'name': 'Independence Telecommunications' + 'name': 'Independence Telecommunications', }, 'rrc010': { - 'name': 'Inland Networks' + 'name': 'Inland Networks', }, 'stc020': { - 'name': 'Innovative Cable TV St Croix' + 'name': 'Innovative Cable TV St Croix', }, 'car100': { - 'name': 'Innovative Cable TV St Thomas-St John' + 'name': 'Innovative Cable TV St Thomas-St John', }, 'icc010': { - 'name': 'Inside Connect Cable' + 'name': 'Inside Connect Cable', }, 'int100': { - 'name': 'Integra Telecom' + 'name': 'Integra Telecom', }, 'int050': { - 'name': 'Interstate Telecommunications Coop' + 'name': 'Interstate Telecommunications Coop', }, 'irv010': { - 'name': 'Irvine Cable' + 'name': 'Irvine Cable', }, 'k2c010': { - 'name': 'K2 Communications' + 'name': 'K2 Communications', }, 'kal010': { - 'name': 'Kalida Telephone Company, Inc.' + 'name': 'Kalida Telephone Company, Inc.', }, 'kal030': { - 'name': 'Kalona Cooperative Telephone Company' + 'name': 'Kalona Cooperative Telephone Company', }, 'kmt010': { - 'name': 'KMTelecom' + 'name': 'KMTelecom', }, 'kpu010': { - 'name': 'KPU Telecommunications' + 'name': 'KPU Telecommunications', }, 'kuh010': { - 'name': 'Kuhn Communications, Inc.' + 'name': 'Kuhn Communications, Inc.', }, 'lak130': { - 'name': 'Lakeland Communications' + 'name': 'Lakeland Communications', }, 'lan010': { - 'name': 'Langco' + 'name': 'Langco', }, 'lau020': { - 'name': 'Laurel Highland Total Communications, Inc.' + 'name': 'Laurel Highland Total Communications, Inc.', }, 'leh010': { - 'name': 'Lehigh Valley Cooperative Telephone' + 'name': 'Lehigh Valley Cooperative Telephone', }, 'bra010': { - 'name': 'Limestone Cable/Bracken Cable' + 'name': 'Limestone Cable/Bracken Cable', }, 'loc020': { - 'name': 'LISCO' + 'name': 'LISCO', }, 'lit020': { - 'name': 'Litestream' + 'name': 'Litestream', }, 'tel140': { - 'name': 'LivCom' + 'name': 'LivCom', }, 'loc010': { - 'name': 'LocalTel Communications' + 'name': 'LocalTel Communications', }, 'weh010-longview': { - 'name': 'Longview - Kilgore Cable TV' + 'name': 'Longview - Kilgore Cable TV', }, 'lon030': { - 'name': 'Lonsdale Video Ventures, LLC' + 'name': 'Lonsdale Video Ventures, LLC', }, 'lns010': { - 'name': 'Lost Nation-Elwood Telephone Co.' + 'name': 'Lost Nation-Elwood Telephone Co.', }, 'nttclpc010': { - 'name': 'LPC Connect' + 'name': 'LPC Connect', }, 'lumos': { - 'name': 'Lumos Networks' + 'name': 'Lumos Networks', }, 'madison': { - 'name': 'Madison Communications' + 'name': 'Madison Communications', }, 'mad030': { - 'name': 'Madison County Cable Inc.' + 'name': 'Madison County Cable Inc.', }, 'nttcmah010': { - 'name': 'Mahaska Communication Group' + 'name': 'Mahaska Communication Group', }, 'mar010': { - 'name': 'Marne & Elk Horn Telephone Company' + 'name': 'Marne & Elk Horn Telephone Company', }, 'mcc040': { - 'name': 'McClure Telephone Co.' + 'name': 'McClure Telephone Co.', }, 'mctv': { - 'name': 'MCTV' + 'name': 'MCTV', }, 'merrimac': { - 'name': 'Merrimac Communications Ltd.' + 'name': 'Merrimac Communications Ltd.', }, 'metronet': { - 'name': 'Metronet' + 'name': 'Metronet', }, 'mhtc': { - 'name': 'MHTC' + 'name': 'MHTC', }, 'midhudson': { - 'name': 'Mid-Hudson Cable' + 'name': 'Mid-Hudson Cable', }, 'midrivers': { - 'name': 'Mid-Rivers Communications' + 'name': 'Mid-Rivers Communications', }, 'mid045': { - 'name': 'Midstate Communications' + 'name': 'Midstate Communications', }, 'mil080': { - 'name': 'Milford Communications' + 'name': 'Milford Communications', }, 'min030': { - 'name': 'MINET' + 'name': 'MINET', }, 'nttcmin010': { - 'name': 'Minford TV' + 'name': 'Minford TV', }, 'san040-02': { - 'name': 'Mitchell Telecom' + 'name': 'Mitchell Telecom', }, 'mlg010': { - 'name': 'MLGC' + 'name': 'MLGC', }, 'mon060': { - 'name': 'Mon-Cre TVE' + 'name': 'Mon-Cre TVE', }, 'mou110': { - 'name': 'Mountain Telephone' + 'name': 'Mountain Telephone', }, 'mou050': { - 'name': 'Mountain Village Cable' + 'name': 'Mountain Village Cable', }, 'mtacomm': { - 'name': 'MTA Communications, LLC' + 'name': 'MTA Communications, LLC', }, 'mtc010': { - 'name': 'MTC Cable' + 'name': 'MTC Cable', }, 'med040': { - 'name': 'MTC Technologies' + 'name': 'MTC Technologies', }, 'man060': { - 'name': 'MTCC' + 'name': 'MTCC', }, 'mtc030': { - 'name': 'MTCO Communications' + 'name': 'MTCO Communications', }, 'mul050': { - 'name': 'Mulberry Telecommunications' + 'name': 'Mulberry Telecommunications', }, 'mur010': { - 'name': 'Murray Electric System' + 'name': 'Murray Electric System', }, 'musfiber': { - 'name': 'MUS FiberNET' + 'name': 'MUS FiberNET', }, 'mpw': { - 'name': 'Muscatine Power & Water' + 'name': 'Muscatine Power & Water', }, 'nttcsli010': { - 'name': 'myEVTV.com' + 'name': 'myEVTV.com', }, 'nor115': { - 'name': 'NCC' + 'name': 'NCC', }, 'nor260': { - 'name': 'NDTC' + 'name': 'NDTC', }, 'nctc': { - 'name': 'Nebraska Central Telecom, Inc.' + 'name': 'Nebraska Central Telecom, Inc.', }, 'nel020': { - 'name': 'Nelsonville TV Cable' + 'name': 'Nelsonville TV Cable', }, 'nem010': { - 'name': 'Nemont' + 'name': 'Nemont', }, 'new075': { - 'name': 'New Hope Telephone Cooperative' + 'name': 'New Hope Telephone Cooperative', }, 'nor240': { - 'name': 'NICP' + 'name': 'NICP', }, 'cic010': { - 'name': 'NineStar Connect' + 'name': 'NineStar Connect', }, 'nktelco': { - 'name': 'NKTelco' + 'name': 'NKTelco', }, 'nortex': { - 'name': 'Nortex Communications' + 'name': 'Nortex Communications', }, 'nor140': { - 'name': 'North Central Telephone Cooperative' + 'name': 'North Central Telephone Cooperative', }, 'nor030': { - 'name': 'Northland Communications' + 'name': 'Northland Communications', }, 'nor075': { - 'name': 'Northwest Communications' + 'name': 'Northwest Communications', }, 'nor125': { - 'name': 'Norwood Light Broadband' + 'name': 'Norwood Light Broadband', }, 'net010': { - 'name': 'Nsight Telservices' + 'name': 'Nsight Telservices', }, 'dur010': { - 'name': 'Ntec' + 'name': 'Ntec', }, 'nts010': { - 'name': 'NTS Communications' + 'name': 'NTS Communications', }, 'new045': { - 'name': 'NU-Telecom' + 'name': 'NU-Telecom', }, 'nulink': { - 'name': 'NuLink' + 'name': 'NuLink', }, 'jam030': { - 'name': 'NVC' + 'name': 'NVC', }, 'far035': { - 'name': 'OmniTel Communications' + 'name': 'OmniTel Communications', }, 'onesource': { - 'name': 'OneSource Communications' + 'name': 'OneSource Communications', }, 'cit230': { - 'name': 'Opelika Power Services' + 'name': 'Opelika Power Services', }, 'daltonutilities': { - 'name': 'OptiLink' + 'name': 'OptiLink', }, 'mid140': { - 'name': 'OPTURA' + 'name': 'OPTURA', }, 'ote010': { - 'name': 'OTEC Communication Company' + 'name': 'OTEC Communication Company', }, 'cci020': { - 'name': 'Packerland Broadband' + 'name': 'Packerland Broadband', }, 'pan010': { - 'name': 'Panora Telco/Guthrie Center Communications' + 'name': 'Panora Telco/Guthrie Center Communications', }, 'otter': { - 'name': 'Park Region Telephone & Otter Tail Telcom' + 'name': 'Park Region Telephone & Otter Tail Telcom', }, 'mid050': { - 'name': 'Partner Communications Cooperative' + 'name': 'Partner Communications Cooperative', }, 'fib010': { - 'name': 'Pathway' + 'name': 'Pathway', }, 'paulbunyan': { - 'name': 'Paul Bunyan Communications' + 'name': 'Paul Bunyan Communications', }, 'pem020': { - 'name': 'Pembroke Telephone Company' + 'name': 'Pembroke Telephone Company', }, 'mck010': { - 'name': 'Peoples Rural Telephone Cooperative' + 'name': 'Peoples Rural Telephone Cooperative', }, 'pul010': { - 'name': 'PES Energize' + 'name': 'PES Energize', }, 'phi010': { - 'name': 'Philippi Communications System' + 'name': 'Philippi Communications System', }, 'phonoscope': { - 'name': 'Phonoscope Cable' + 'name': 'Phonoscope Cable', }, 'pin070': { - 'name': 'Pine Belt Communications, Inc.' + 'name': 'Pine Belt Communications, Inc.', }, 'weh010-pine': { - 'name': 'Pine Bluff Cable TV' + 'name': 'Pine Bluff Cable TV', }, 'pin060': { - 'name': 'Pineland Telephone Cooperative' + 'name': 'Pineland Telephone Cooperative', }, 'cam010': { - 'name': 'Pinpoint Communications' + 'name': 'Pinpoint Communications', }, 'pio060': { - 'name': 'Pioneer Broadband' + 'name': 'Pioneer Broadband', }, 'pioncomm': { - 'name': 'Pioneer Communications' + 'name': 'Pioneer Communications', }, 'pioneer': { - 'name': 'Pioneer DTV' + 'name': 'Pioneer DTV', }, 'pla020': { - 'name': 'Plant TiftNet, Inc.' + 'name': 'Plant TiftNet, Inc.', }, 'par010': { - 'name': 'PLWC' + 'name': 'PLWC', }, 'pro035': { - 'name': 'PMT' + 'name': 'PMT', }, 'vik011': { - 'name': 'Polar Cablevision' + 'name': 'Polar Cablevision', }, 'pottawatomie': { - 'name': 'Pottawatomie Telephone Co.' + 'name': 'Pottawatomie Telephone Co.', }, 'premiercomm': { - 'name': 'Premier Communications' + 'name': 'Premier Communications', }, 'psc010': { - 'name': 'PSC' + 'name': 'PSC', }, 'pan020': { - 'name': 'PTCI' + 'name': 'PTCI', }, 'qco010': { - 'name': 'QCOL' + 'name': 'QCOL', }, 'qua010': { - 'name': 'Quality Cablevision' + 'name': 'Quality Cablevision', }, 'rad010': { - 'name': 'Radcliffe Telephone Company' + 'name': 'Radcliffe Telephone Company', }, 'car040': { - 'name': 'Rainbow Communications' + 'name': 'Rainbow Communications', }, 'rai030': { - 'name': 'Rainier Connect' + 'name': 'Rainier Connect', }, 'ral010': { - 'name': 'Ralls Technologies' + 'name': 'Ralls Technologies', }, 'rct010': { - 'name': 'RC Technologies' + 'name': 'RC Technologies', }, 'red040': { - 'name': 'Red River Communications' + 'name': 'Red River Communications', }, 'ree010': { - 'name': 'Reedsburg Utility Commission' + 'name': 'Reedsburg Utility Commission', }, 'mol010': { - 'name': 'Reliance Connects- Oregon' + 'name': 'Reliance Connects- Oregon', }, 'res020': { - 'name': 'Reserve Telecommunications' + 'name': 'Reserve Telecommunications', }, 'weh010-resort': { - 'name': 'Resort TV Cable' + 'name': 'Resort TV Cable', }, 'rld010': { - 'name': 'Richland Grant Telephone Cooperative, Inc.' + 'name': 'Richland Grant Telephone Cooperative, Inc.', }, 'riv030': { - 'name': 'River Valley Telecommunications Coop' + 'name': 'River Valley Telecommunications Coop', }, 'rockportcable': { - 'name': 'Rock Port Cablevision' + 'name': 'Rock Port Cablevision', }, 'rsf010': { - 'name': 'RS Fiber' + 'name': 'RS Fiber', }, 'rtc': { - 'name': 'RTC Communication Corp' + 'name': 'RTC Communication Corp', }, 'res040': { - 'name': 'RTC-Reservation Telephone Coop.' + 'name': 'RTC-Reservation Telephone Coop.', }, 'rte010': { - 'name': 'RTEC Communications' + 'name': 'RTEC Communications', }, 'stc010': { - 'name': 'S&T' + 'name': 'S&T', }, 'san020': { - 'name': 'San Bruno Cable TV' + 'name': 'San Bruno Cable TV', }, 'san040-01': { - 'name': 'Santel' + 'name': 'Santel', }, 'sav010': { - 'name': 'SCI Broadband-Savage Communications Inc.' + 'name': 'SCI Broadband-Savage Communications Inc.', }, 'sco050': { - 'name': 'Scottsboro Electric Power Board' + 'name': 'Scottsboro Electric Power Board', }, 'scr010': { - 'name': 'Scranton Telephone Company' + 'name': 'Scranton Telephone Company', }, 'selco': { - 'name': 'SELCO' + 'name': 'SELCO', }, 'she010': { - 'name': 'Shentel' + 'name': 'Shentel', }, 'she030': { - 'name': 'Sherwood Mutual Telephone Association, Inc.' + 'name': 'Sherwood Mutual Telephone Association, Inc.', }, 'ind060-ssc': { - 'name': 'Silver Star Communications' + 'name': 'Silver Star Communications', }, 'sjoberg': { - 'name': 'Sjoberg\'s Inc.' + 'name': 'Sjoberg\'s Inc.', }, 'sou025': { - 'name': 'SKT' + 'name': 'SKT', }, 'sky050': { - 'name': 'SkyBest TV' + 'name': 'SkyBest TV', }, 'nttcsmi010': { - 'name': 'Smithville Communications' + 'name': 'Smithville Communications', }, 'woo010': { - 'name': 'Solarus' + 'name': 'Solarus', }, 'sou075': { - 'name': 'South Central Rural Telephone Cooperative' + 'name': 'South Central Rural Telephone Cooperative', }, 'sou065': { - 'name': 'South Holt Cablevision, Inc.' + 'name': 'South Holt Cablevision, Inc.', }, 'sou035': { - 'name': 'South Slope Cooperative Communications' + 'name': 'South Slope Cooperative Communications', }, 'spa020': { - 'name': 'Spanish Fork Community Network' + 'name': 'Spanish Fork Community Network', }, 'spe010': { - 'name': 'Spencer Municipal Utilities' + 'name': 'Spencer Municipal Utilities', }, 'spi005': { - 'name': 'Spillway Communications, Inc.' + 'name': 'Spillway Communications, Inc.', }, 'srt010': { - 'name': 'SRT' + 'name': 'SRT', }, 'cccsmc010': { - 'name': 'St. Maarten Cable TV' + 'name': 'St. Maarten Cable TV', }, 'sta025': { - 'name': 'Star Communications' + 'name': 'Star Communications', }, 'sco020': { - 'name': 'STE' + 'name': 'STE', }, 'uin010': { - 'name': 'STRATA Networks' + 'name': 'STRATA Networks', }, 'sum010': { - 'name': 'Sumner Cable TV' + 'name': 'Sumner Cable TV', }, 'pie010': { - 'name': 'Surry TV/PCSI TV' + 'name': 'Surry TV/PCSI TV', }, 'swa010': { - 'name': 'Swayzee Communications' + 'name': 'Swayzee Communications', }, 'sweetwater': { - 'name': 'Sweetwater Cable Television Co' + 'name': 'Sweetwater Cable Television Co', }, 'weh010-talequah': { - 'name': 'Tahlequah Cable TV' + 'name': 'Tahlequah Cable TV', }, 'tct': { - 'name': 'TCT' + 'name': 'TCT', }, 'tel050': { - 'name': 'Tele-Media Company' + 'name': 'Tele-Media Company', }, 'com050': { - 'name': 'The Community Agency' + 'name': 'The Community Agency', }, 'thr020': { - 'name': 'Three River' + 'name': 'Three River', }, 'cab140': { - 'name': 'Town & Country Technologies' + 'name': 'Town & Country Technologies', }, 'tra010': { - 'name': 'Trans-Video' + 'name': 'Trans-Video', }, 'tre010': { - 'name': 'Trenton TV Cable Company' + 'name': 'Trenton TV Cable Company', }, 'tcc': { - 'name': 'Tri County Communications Cooperative' + 'name': 'Tri County Communications Cooperative', }, 'tri025': { - 'name': 'TriCounty Telecom' + 'name': 'TriCounty Telecom', }, 'tri110': { - 'name': 'TrioTel Communications, Inc.' + 'name': 'TrioTel Communications, Inc.', }, 'tro010': { - 'name': 'Troy Cablevision, Inc.' + 'name': 'Troy Cablevision, Inc.', }, 'tsc': { - 'name': 'TSC' + 'name': 'TSC', }, 'cit220': { - 'name': 'Tullahoma Utilities Board' + 'name': 'Tullahoma Utilities Board', }, 'tvc030': { - 'name': 'TV Cable of Rensselaer' + 'name': 'TV Cable of Rensselaer', }, 'tvc015': { - 'name': 'TVC Cable' + 'name': 'TVC Cable', }, 'cab180': { - 'name': 'TVision' + 'name': 'TVision', }, 'twi040': { - 'name': 'Twin Lakes' + 'name': 'Twin Lakes', }, 'tvtinc': { - 'name': 'Twin Valley' + 'name': 'Twin Valley', }, 'uis010': { - 'name': 'Union Telephone Company' + 'name': 'Union Telephone Company', }, 'uni110': { - 'name': 'United Communications - TN' + 'name': 'United Communications - TN', }, 'uni120': { - 'name': 'United Services' + 'name': 'United Services', }, 'uss020': { - 'name': 'US Sonet' + 'name': 'US Sonet', }, 'cab060': { - 'name': 'USA Communications' + 'name': 'USA Communications', }, 'she005': { - 'name': 'USA Communications/Shellsburg, IA' + 'name': 'USA Communications/Shellsburg, IA', }, 'val040': { - 'name': 'Valley TeleCom Group' + 'name': 'Valley TeleCom Group', }, 'val025': { - 'name': 'Valley Telecommunications' + 'name': 'Valley Telecommunications', }, 'val030': { - 'name': 'Valparaiso Broadband' + 'name': 'Valparaiso Broadband', }, 'cla050': { - 'name': 'Vast Broadband' + 'name': 'Vast Broadband', }, 'sul015': { - 'name': 'Venture Communications Cooperative, Inc.' + 'name': 'Venture Communications Cooperative, Inc.', }, 'ver025': { - 'name': 'Vernon Communications Co-op' + 'name': 'Vernon Communications Co-op', }, 'weh010-vicksburg': { - 'name': 'Vicksburg Video' + 'name': 'Vicksburg Video', }, 'vis070': { - 'name': 'Vision Communications' + 'name': 'Vision Communications', }, 'volcanotel': { - 'name': 'Volcano Vision, Inc.' + 'name': 'Volcano Vision, Inc.', }, 'vol040-02': { - 'name': 'VolFirst / BLTV' + 'name': 'VolFirst / BLTV', }, 'ver070': { - 'name': 'VTel' + 'name': 'VTel', }, 'nttcvtx010': { - 'name': 'VTX1' + 'name': 'VTX1', }, 'bci010-02': { - 'name': 'Vyve Broadband' + 'name': 'Vyve Broadband', }, 'wab020': { - 'name': 'Wabash Mutual Telephone' + 'name': 'Wabash Mutual Telephone', }, 'waitsfield': { - 'name': 'Waitsfield Cable' + 'name': 'Waitsfield Cable', }, 'wal010': { - 'name': 'Walnut Communications' + 'name': 'Walnut Communications', }, 'wavebroadband': { - 'name': 'Wave' + 'name': 'Wave', }, 'wav030': { - 'name': 'Waverly Communications Utility' + 'name': 'Waverly Communications Utility', }, 'wbi010': { - 'name': 'WBI' + 'name': 'WBI', }, 'web020': { - 'name': 'Webster-Calhoun Cooperative Telephone Association' + 'name': 'Webster-Calhoun Cooperative Telephone Association', }, 'wes005': { - 'name': 'West Alabama TV Cable' + 'name': 'West Alabama TV Cable', }, 'carolinata': { - 'name': 'West Carolina Communications' + 'name': 'West Carolina Communications', }, 'wct010': { - 'name': 'West Central Telephone Association' + 'name': 'West Central Telephone Association', }, 'wes110': { - 'name': 'West River Cooperative Telephone Company' + 'name': 'West River Cooperative Telephone Company', }, 'ani030': { - 'name': 'WesTel Systems' + 'name': 'WesTel Systems', }, 'westianet': { - 'name': 'Western Iowa Networks' + 'name': 'Western Iowa Networks', }, 'nttcwhi010': { - 'name': 'Whidbey Telecom' + 'name': 'Whidbey Telecom', }, 'weh010-white': { - 'name': 'White County Cable TV' + 'name': 'White County Cable TV', }, 'wes130': { - 'name': 'Wiatel' + 'name': 'Wiatel', }, 'wik010': { - 'name': 'Wiktel' + 'name': 'Wiktel', }, 'wil070': { - 'name': 'Wilkes Communications, Inc./RiverStreet Networks' + 'name': 'Wilkes Communications, Inc./RiverStreet Networks', }, 'wil015': { - 'name': 'Wilson Communications' + 'name': 'Wilson Communications', }, 'win010': { - 'name': 'Windomnet/SMBS' + 'name': 'Windomnet/SMBS', }, 'win090': { - 'name': 'Windstream Cable TV' + 'name': 'Windstream Cable TV', }, 'wcta': { - 'name': 'Winnebago Cooperative Telecom Association' + 'name': 'Winnebago Cooperative Telecom Association', }, 'wtc010': { - 'name': 'WTC' + 'name': 'WTC', }, 'wil040': { - 'name': 'WTC Communications, Inc.' + 'name': 'WTC Communications, Inc.', }, 'wya010': { - 'name': 'Wyandotte Cable' + 'name': 'Wyandotte Cable', }, 'hin020-02': { - 'name': 'X-Stream Services' + 'name': 'X-Stream Services', }, 'xit010': { - 'name': 'XIT Communications' + 'name': 'XIT Communications', }, 'yel010': { - 'name': 'Yelcot Communications' + 'name': 'Yelcot Communications', }, 'mid180-01': { - 'name': 'yondoo' + 'name': 'yondoo', }, 'cou060': { - 'name': 'Zito Media' + 'name': 'Zito Media', }, 'slingtv': { 'name': 'Sling TV', @@ -1363,7 +1363,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en headers = self.geo_verification_headers() headers.update(kwargs.get('headers', {})) kwargs['headers'] = headers - return super(AdobePassIE, self)._download_webpage_handle( + return super()._download_webpage_handle( *args, **kwargs) @staticmethod @@ -1384,7 +1384,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): def xml_text(xml_str, tag): return self._search_regex( - '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag) + f'<{tag}>(.+?)</{tag}>', xml_str, tag) def is_expired(token, date_ele): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) @@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en form_page, urlh = form_page_res post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url') if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.url, post_url) + post_url = urllib.parse.urljoin(urlh.url, post_url) form_data = self._hidden_inputs(form_page) form_data.update(data) return self._download_webpage_handle( @@ -1414,13 +1414,13 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' redirect_url = self._search_regex( r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' - r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX, + rf'(?:[a-z-]+="[^"]+"\s+)*?content="{REDIRECT_REGEX}', html, 'meta refresh redirect', default=NO_DEFAULT if fatal else None, fatal=fatal) if not redirect_url: return None if url: - redirect_url = compat_urlparse.urljoin(url, unescapeHTML(redirect_url)) + redirect_url = urllib.parse.urljoin(url, unescapeHTML(redirect_url)) return redirect_url mvpd_headers = { @@ -1506,12 +1506,12 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en 'ident': username, 'device': 'web', 'send_confirm_link': False, - 'send_token': True + 'send_token': True, })) philo_code = getpass.getpass('Type auth code you have received [Return]: ') self._download_webpage( 'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({ - 'token': philo_code + 'token': philo_code, })) mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login') post_form(mvpd_confirm_page_res, 'Confirming Login') @@ -1569,9 +1569,9 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en saml_response_json['targetValue'], video_id, 'Confirming Login', data=urlencode_postdata({ 'SAMLResponse': saml_response_json['SAMLResponse'], - 'RelayState': saml_response_json['RelayState'] + 'RelayState': saml_response_json['RelayState'], }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }) elif mso_id in ('Spectrum', 'Charter_Direct'): # Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow @@ -1606,7 +1606,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en 'SAMLResponse': saml_response_json['SAMLResponse'], 'RelayState': relay_state, }), headers={ - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }) elif mso_id == 'slingtv': # SlingTV has a meta-refresh based authentication, but also @@ -1625,7 +1625,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en provider_association_redirect, urlh = post_form( provider_login_page_res, 'Logging in', { mso_info['username_field']: username, - mso_info['password_field']: password + mso_info['password_field']: password, }) provider_refresh_redirect_url = extract_redirect_url( @@ -1676,7 +1676,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en provider_association_redirect, urlh = post_form( provider_login_page_res, 'Logging in', { mso_info['username_field']: username, - mso_info['password_field']: password + mso_info['password_field']: password, }) provider_refresh_redirect_url = extract_redirect_url( @@ -1708,7 +1708,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE) form_data = { mso_info.get('username_field', 'username'): username, - mso_info.get('password_field', 'password'): password + mso_info.get('password_field', 'password'): password, } if mso_id in ('Cablevision', 'AlticeOne'): form_data['_eventId_proceed'] = '' diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index 08e9e51..4608e5c 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -2,7 +2,6 @@ import functools import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ISO639Utils, OnDemandPagedList, @@ -36,7 +35,7 @@ class AdobeTVBaseIE(InfoExtractor): return subtitles def _parse_video_data(self, video_data): - video_id = compat_str(video_data['id']) + video_id = str(video_data['id']) title = video_data['title'] s3_extracted = False @@ -151,7 +150,7 @@ class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): page += 1 query['page'] = page for element_data in self._call_api( - self._RESOURCE, display_id, query, 'Download Page %d' % page): + self._RESOURCE, display_id, query, f'Download Page {page}'): yield self._process_data(element_data) def _extract_playlist_entries(self, display_id, query): diff --git a/yt_dlp/extractor/adultswim.py b/yt_dlp/extractor/adultswim.py index d807c41..2c83701 100644 --- a/yt_dlp/extractor/adultswim.py +++ b/yt_dlp/extractor/adultswim.py @@ -91,7 +91,7 @@ class AdultSwimIE(TurnerBaseIE): getShowBySlug(slug:"%s") { %%s } -}''' % show_path +}''' % show_path # noqa: UP031 if episode_path: query = query % '''title getVideoBySlug(slug:"%s") { @@ -128,7 +128,7 @@ class AdultSwimIE(TurnerBaseIE): episode_title = title = video_data['title'] series = show_data.get('title') if series: - title = '%s - %s' % (series, title) + title = f'{series} - {title}' info = { 'id': video_id, 'title': title, @@ -191,7 +191,7 @@ class AdultSwimIE(TurnerBaseIE): if not slug: continue entries.append(self.url_result( - 'http://adultswim.com/videos/%s/%s' % (show_path, slug), + f'http://adultswim.com/videos/{show_path}/{slug}', 'AdultSwim', video.get('_id'))) return self.playlist_result( entries, show_path, show_data.get('title'), diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index ab4b6c0..8e25786 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -73,8 +73,8 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE def _extract_aetn_info(self, domain, filter_key, filter_value, url): requestor_id, brand = self._DOMAIN_MAP[domain] result = self._download_json( - 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand, - filter_value, query={'filter[%s]' % filter_key: filter_value}) + f'https://feeds.video.aetnd.com/api/v2/{brand}/videos', + filter_value, query={f'filter[{filter_key}]': filter_value}) result = traverse_obj( result, ('results', lambda k, v: k == 0 and v[filter_key] == filter_value), @@ -142,7 +142,7 @@ class AENetworksIE(AENetworksBaseIE): 'skip_download': True, }, 'add_ie': ['ThePlatform'], - 'skip': 'Geo-restricted - This content is not available in your location.' + 'skip': 'Geo-restricted - This content is not available in your location.', }, { 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', 'info_dict': { @@ -171,28 +171,28 @@ class AENetworksIE(AENetworksBaseIE): 'skip': 'This video is only available for users of participating TV providers.', }, { 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://www.history.com/videos/history-of-valentines-day', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -209,14 +209,14 @@ class AENetworksListBaseIE(AENetworksBaseIE): %s(slug: "%s") { %s } -}''' % (resource, slug, fields), +}''' % (resource, slug, fields), # noqa: UP031 }))['data'][resource] def _real_extract(self, url): domain, slug = self._match_valid_url(url).groups() _, brand = self._DOMAIN_MAP[domain] playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS) - base_url = 'http://watch.%s' % domain + base_url = f'http://watch.{domain}' entries = [] for item in (playlist.get(self._ITEMS_KEY) or []): @@ -248,10 +248,10 @@ class AENetworksCollectionIE(AENetworksListBaseIE): 'playlist_mincount': 12, }, { 'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.historyvault.com/collections/mysteryquest', - 'only_matching': True + 'only_matching': True, }] _RESOURCE = 'list' _ITEMS_KEY = 'items' @@ -309,7 +309,7 @@ class HistoryTopicIE(AENetworksBaseIE): 'info_dict': { 'id': '40700995724', 'ext': 'mp4', - 'title': "History of Valentine’s Day", + 'title': 'History of Valentine’s Day', 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', 'timestamp': 1375819729, 'upload_date': '20130806', @@ -364,6 +364,6 @@ class BiographyIE(AENetworksBaseIE): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) player_url = self._search_regex( - r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL, + rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})', webpage, 'player URL') return self.url_result(player_url, HistoryPlayerIE.ie_key()) diff --git a/yt_dlp/extractor/aeonco.py b/yt_dlp/extractor/aeonco.py index 390eae3..22d0266 100644 --- a/yt_dlp/extractor/aeonco.py +++ b/yt_dlp/extractor/aeonco.py @@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Semiconductor', 'uploader_id': 'semiconductor', 'uploader_url': 'https://vimeo.com/semiconductor', - 'duration': 348 - } + 'duration': 348, + }, }, { 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', 'md5': '03582d795382e49f2fd0b427b55de409', @@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Aeon Video', 'uploader_id': 'aeonvideo', 'uploader_url': 'https://vimeo.com/aeonvideo', - 'duration': 1344 - } + 'duration': 1344, + }, }, { 'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out', 'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b', diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 3e5738f..f51b5a6 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -55,7 +55,7 @@ class AfreecaTVBaseIE(InfoExtractor): if result != 1: error = _ERRORS.get(result, 'You have failed to log in.') raise ExtractorError( - 'Unable to login: %s said: %s' % (self.IE_NAME, error), + f'Unable to login: {self.IE_NAME} said: {error}', expected=True) @@ -72,7 +72,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): )\?.*?\bnTitleNo=| vod\.afreecatv\.com/(PLAYER/STATION|player)/ ) - (?P<id>\d+) + (?P<id>\d+)/?(?:$|[?#&]) ''' _TESTS = [{ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', @@ -189,7 +189,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): headers={'Referer': url}, data=urlencode_postdata({ 'nTitleNo': video_id, 'nApiLevel': 10, - }))['data'] + }), impersonate=True)['data'] error_code = traverse_obj(data, ('code', {int})) if error_code == -6221: @@ -227,7 +227,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): **traverse_obj(file_element, { 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('file_start', {unified_timestamp}), - }) + }), }) if traverse_obj(data, ('adult_status', {str})) == 'notLogin': @@ -253,6 +253,43 @@ class AfreecaTVIE(AfreecaTVBaseIE): return self.playlist_result(entries, video_id, multi_video=True, **common_info) +class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): + IE_NAME = 'afreecatv:catchstory' + IE_DESC = 'afreecatv.com catch story' + _VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory' + _TESTS = [{ + 'url': 'https://vod.afreecatv.com/player/103247/catchstory', + 'info_dict': { + 'id': '103247', + }, + 'playlist_count': 2, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._download_json( + 'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url}, + query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True) + + return self.playlist_result(self._entries(data), video_id) + + @staticmethod + def _entries(data): + # 'files' is always a list with 1 element + yield from traverse_obj(data, ( + 'data', lambda _, v: v['story_type'] == 'catch', + 'catch_list', lambda _, v: v['files'][0]['file'], { + 'id': ('files', 0, 'file_info_key', {str}), + 'url': ('files', 0, 'file', {url_or_none}), + 'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}), + 'title': ('title', {str}), + 'uploader': ('writer_nick', {str}), + 'uploader_id': ('writer_id', {str}), + 'thumbnail': ('thumb', {url_or_none}), + 'timestamp': ('write_timestamp', {int_or_none}), + })) + + class AfreecaTVLiveIE(AfreecaTVBaseIE): IE_NAME = 'afreecatv:live' IE_DESC = 'afreecatv.com livestreams' diff --git a/yt_dlp/extractor/agora.py b/yt_dlp/extractor/agora.py index abb2d3f..9835584 100644 --- a/yt_dlp/extractor/agora.py +++ b/yt_dlp/extractor/agora.py @@ -168,7 +168,7 @@ class TokFMPodcastIE(InfoExtractor): for ext in ('aac', 'mp3'): url_data = self._download_json( f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', - media_id, 'Downloading podcast %s URL' % ext) + media_id, f'Downloading podcast {ext} URL') # prevents inserting the mp3 (default) multiple times if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: formats.append({ @@ -206,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor): } @staticmethod - def _create_url(id): - return f'https://audycje.tokfm.pl/audycja/{id}' + def _create_url(video_id): + return f'https://audycje.tokfm.pl/audycja/{video_id}' def _real_extract(self, url): audition_id = self._match_id(url) diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py index 6cc63cd..cee660d 100644 --- a/yt_dlp/extractor/airtv.py +++ b/yt_dlp/extractor/airtv.py @@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor): 'view_count': int, 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', 'timestamp': 1664792603, - } + }, }, { # with youtube_id 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', @@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor): 'channel': 'Newsflare', 'duration': 37, 'upload_date': '20180511', - } + }, }] def _get_formats_and_subtitle(self, json_data, video_id): diff --git a/yt_dlp/extractor/aitube.py b/yt_dlp/extractor/aitube.py index 89a6450..5179b72 100644 --- a/yt_dlp/extractor/aitube.py +++ b/yt_dlp/extractor/aitube.py @@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor): 'timestamp': 1667370519, 'title': 'Ангел хранитель 1 серия', 'channel_follower_count': int, - } + }, }, { # embed url 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py index 2e83f2e..e8f8618 100644 --- a/yt_dlp/extractor/aliexpress.py +++ b/yt_dlp/extractor/aliexpress.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, try_get, @@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor): 'title': title, 'thumbnail': data.get('coverUrl'), 'uploader': try_get( - data, lambda x: x['followBar']['name'], compat_str), + data, lambda x: x['followBar']['name'], str), 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), 'formats': formats, } diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index 124bab0..9715b49 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor): 'timestamp': 1636219149, 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', 'upload_date': '20211106', - } + }, }, { 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', 'info_dict': { @@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor): BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P<account>\d+)/(?P<player_id>[a-zA-Z0-9]+)_(?P<embed>[^/]+)/index.html\?videoId=(?P<id>\d+)' def _real_extract(self, url): - base, post_type, id = self._match_valid_url(url).groups() + base, post_type, display_id = self._match_valid_url(url).groups() wp = { 'balkans.aljazeera.net': 'ajb', 'chinese.aljazeera.net': 'chinese', @@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor): 'news': 'news', }[post_type.split('/')[0]] video = self._download_json( - f'https://{base}/graphql', id, query={ + f'https://{base}/graphql', display_id, query={ 'wp-site': wp, 'operationName': 'ArchipelagoSingleArticleQuery', 'variables': json.dumps({ - 'name': id, + 'name': display_id, 'postType': post_type, }), }, headers={ @@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor): embed = 'default' if video_id is None: - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, display_id) account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', group=(1, 2, 3, 4), default=(None, None, None, None)) @@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor): return { '_type': 'url_transparent', 'url': url, - 'ie_key': 'Generic' + 'ie_key': 'Generic', } return { '_type': 'url_transparent', 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', - 'ie_key': 'BrightcoveNew' + 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 2d342cf..e0859d4 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, qualities, @@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor): duration = int_or_none(video.get('duration')) view_count = int_or_none(video.get('view_count')) timestamp = unified_timestamp(try_get( - video, lambda x: x['added_at']['date'], compat_str)) + video, lambda x: x['added_at']['date'], str)) else: video_id = display_id media_data = self._download_json( - 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id) title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné')) for key, value in media_data['video'].items(): if not key.endswith('Path'): diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py index 49df4bf..5ea1c30 100644 --- a/yt_dlp/extractor/allstar.py +++ b/yt_dlp/extractor/allstar.py @@ -33,27 +33,27 @@ _QUERIES = { video: getClip(clipIdentifier: $id) { %s %s } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'montage': '''query ($id: String!) { video: getMontage(clipIdentifier: $id) { %s } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Clips': '''query ($page: Int!, $user: String!, $game: Int) { videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'Montages': '''query ($page: Int!, $user: String!) { videos: montages(search: createdDate, page: $page, user: $user) { data { %s } } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Mobile Clips': '''query ($page: Int!, $user: String!) { videos: clips(search: createdDate, page: $page, user: $user, mobile: true) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 } @@ -121,7 +121,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230425', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/clip?clip=8LJLY4JKB', 'info_dict': { @@ -139,7 +139,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230702', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c', 'info_dict': { @@ -155,7 +155,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230418', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=RILJMH6QOS', 'info_dict': { @@ -171,7 +171,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230703', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -191,28 +191,28 @@ class AllstarProfileIE(AllstarBaseIE): 'id': '62b8bdfc9021052f7905882d-clips', 'title': 'cherokee - Clips', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-clips-730', 'title': 'cherokee - Clips - 730', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-montages', 'title': 'cherokee - Montages', }, - 'playlist_mincount': 4 + 'playlist_mincount': 4, }, { 'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-mobile', 'title': 'cherokee - Mobile Clips', }, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }] _PAGE_SIZE = 10 diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index f927965..7b74d55 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor): 'tbr': 1145, 'categories': list, 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index ea3332e..c315e4f 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -12,7 +12,7 @@ from ..utils import ( class Alsace20TVBaseIE(InfoExtractor): def _extract_video(self, video_id, url=None): info = self._download_json( - 'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ), + f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html', video_id) or {} title = info.get('titre') @@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor): else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' - thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) + thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage)) upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) - upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None + upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/altcensored.py b/yt_dlp/extractor/altcensored.py index 6878918..bfbf6b6 100644 --- a/yt_dlp/extractor/altcensored.py +++ b/yt_dlp/extractor/altcensored.py @@ -34,7 +34,7 @@ class AltCensoredIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', 'view_count': int, 'categories': ['News & Politics'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index cb2b989..ce03a42 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, clean_html, @@ -21,7 +21,7 @@ class AluraIE(InfoExtractor): 'info_dict': { 'id': '60095', 'ext': 'mp4', - 'title': 'Referências, ref-set e alter' + 'title': 'Referências, ref-set e alter', }, 'skip': 'Requires alura account credentials'}, { @@ -30,7 +30,7 @@ class AluraIE(InfoExtractor): 'only_matching': True}, { 'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219', - 'only_matching': True} + 'only_matching': True}, ] def _real_extract(self, url): @@ -62,7 +62,7 @@ class AluraIE(InfoExtractor): return { 'id': video_id, 'title': video_title, - "formats": formats + 'formats': formats, } def _perform_login(self, username, password): @@ -91,7 +91,7 @@ class AluraIE(InfoExtractor): 'post url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( post_url, None, 'Logging in', @@ -103,7 +103,7 @@ class AluraIE(InfoExtractor): r'(?s)<p[^>]+class="alert-message[^"]*">(.+?)</p>', response, 'error message', default=None) if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') @@ -119,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE @classmethod def suitable(cls, url): - return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url) + return False if AluraIE.suitable(url) else super().suitable(url) def _real_extract(self, url): @@ -157,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE 'url': video_url, 'id_key': self.ie_key(), 'chapter': chapter, - 'chapter_number': chapter_number + 'chapter_number': chapter_number, } entries.append(entry) return self.playlist_result(entries, course_path, course_title) diff --git a/yt_dlp/extractor/amadeustv.py b/yt_dlp/extractor/amadeustv.py index 2f5ca91..f4ea04e 100644 --- a/yt_dlp/extractor/amadeustv.py +++ b/yt_dlp/extractor/amadeustv.py @@ -24,7 +24,7 @@ class AmadeusTVIE(InfoExtractor): 'display_id': '65091a87ff85af59d9fc54c3', 'view_count': int, 'description': 'md5:a0357b9c215489e2067cbae0b777bb95', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 509b21a..ed0f0cd 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor): 'uploader': 'PBS NewsHour', 'uploader_id': 'PBSNewsHour', 'timestamp': 1549639570, - } + }, }, { # Vimeo 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', @@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor): 'timestamp': 1294763658, 'upload_date': '20110111', 'uploader': 'Sam Morrill', - 'uploader_id': 'sammorrill' - } + 'uploader_id': 'sammorrill', + }, }, { # Direct Link 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', @@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor): 'subtitles': dict, 'upload_date': '20091007', 'timestamp': 1254942511, - } + }, }] def _real_extract(self, url): video_id = self._match_id(url) meta = self._download_json( - 'https://amara.org/api/videos/%s/' % video_id, + f'https://amara.org/api/videos/{video_id}/', video_id, query={'format': 'json'}) title = meta['title'] video_url = meta['all_urls'][0] diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index a03f983..d1b9166 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) + playlist_id = self._match_id(url) for retry in self.RetryManager(): - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, playlist_id) try: data_json = self._search_json( - r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, + r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id, transform_source=js_to_json) except ExtractorError as e: retry.error = e @@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor): 'height': int_or_none(video.get('videoHeight')), 'width': int_or_none(video.get('videoWidth')), } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] - return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) + return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title')) class AmazonReviewsIE(InfoExtractor): diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 2c71c5e..0590a34 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -25,7 +25,7 @@ class AmazonMiniTVBaseIE(InfoExtractor): asin, note=note, headers={ 'Content-Type': 'application/json', 'currentpageurl': '/', - 'currentplatform': 'dWeb' + 'currentplatform': 'dWeb', }, data=json.dumps(data).encode() if data else None, query=None if data else { 'deviceType': 'A1WMMUXPCUJL4N', diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index 10bd021..15a86e2 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -64,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE site, display_id = self._match_valid_url(url).groups() requestor_id = self._REQUESTOR_ID_MAP[site] page_data = self._download_json( - 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' - % (requestor_id.lower(), display_id), display_id)['data'] + f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}', + display_id)['data'] properties = page_data.get('properties') or {} query = { 'mbr': 'true', @@ -76,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE try: for v in page_data['children']: if v.get('type') == 'video-player': - releasePid = v['properties']['currentVideo']['meta']['releasePid'] - tp_path = 'M_UwQC/' + releasePid + release_pid = v['properties']['currentVideo']['meta']['releasePid'] + tp_path = 'M_UwQC/' + release_pid media_url = 'https://link.theplatform.com/s/' + tp_path video_player_count += 1 except KeyError: pass if video_player_count > 1: self.report_warning( - 'The JSON data has %d video players. Only one will be extracted' % video_player_count) + f'The JSON data has {video_player_count} video players. Only one will be extracted') # Fall back to videoPid if releasePid not found. # TODO: Fall back to videoPid if releasePid manifest uses DRM. @@ -131,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE }) ns_keys = theplatform_metadata.get('$xmlns', {}).keys() if ns_keys: - ns = list(ns_keys)[0] + ns = next(iter(ns_keys)) episode = theplatform_metadata.get(ns + '$episodeTitle') or None episode_number = int_or_none( theplatform_metadata.get(ns + '$episode')) diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py index e889458..a6337e4 100644 --- a/yt_dlp/extractor/americastestkitchen.py +++ b/yt_dlp/extractor/americastestkitchen.py @@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor): resource_type = 'episodes' resource = self._download_json( - 'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id) + f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id) video = resource['video'] if is_episode else resource episode = resource if is_episode else resource.get('episode') or {} return { '_type': 'url_transparent', - 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'], + 'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']), 'ie_key': 'Zype', 'description': clean_html(video.get('description')), 'timestamp': unified_timestamp(video.get('publishDate')), @@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): ] if season_number: - playlist_id = 'season_%d' % season_number - playlist_title = 'Season %d' % season_number + playlist_id = f'season_{season_number}' + playlist_title = f'Season {season_number}' facet_filters.append('search_season_list:' + playlist_title) else: playlist_id = show playlist_title = title season_search = self._download_json( - 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, + f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production', playlist_id, headers={ 'Origin': 'https://www.americastestkitchen.com', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-Application-Id': 'Y1FNZXUI30', }, query={ 'facetFilters': json.dumps(facet_filters), - 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug, + 'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season', 'attributesToHighlight': '', 'hitsPerPage': 1000, }) @@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): 'description': episode.get('description'), 'timestamp': unified_timestamp(episode.get('search_document_date')), 'season_number': season_number, - 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)), + 'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')), 'ie_key': AmericasTestKitchenIE.ie_key(), } diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 6b2bf2d..adf4733 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with 'Unable to download Akamai AMP feed', transform_source=strip_jsonp) item = feed.get('channel', {}).get('item') if not item: - raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error'])) video_id = item['guid'] def get_media_node(name, default=None): - media_name = 'media-%s' % name + media_name = f'media-{name}' media_group = item.get('media-group') or item return media_group.get(media_name) or item.get(media_name) or item.get(name, default) diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py index 5e78f37..652154a 100644 --- a/yt_dlp/extractor/anchorfm.py +++ b/yt_dlp/extractor/anchorfm.py @@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'release_date': '20230121', 'release_timestamp': 1674285179, 'episode_id': 'e1tpt3d', - } + }, }, { # embed url 'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', @@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'season': 'Season 2', 'season_number': 2, 'episode_id': 'e1shjqd', - } + }, }] _WEBPAGE_TESTS = [{ @@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', 'uploader': 'Podcast Tempo', 'channel': 'apakatatempo', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py index 9f5b9b5..6800fe3 100644 --- a/yt_dlp/extractor/angel.py +++ b/yt_dlp/extractor/angel.py @@ -15,8 +15,8 @@ class AngelIE(InfoExtractor): 'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons', 'description': 'md5:73b704897c20ab59c433a9c0a8202d5e', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 1359.0 - } + 'duration': 1359.0, + }, }, { 'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name', 'md5': 'e4774bad0a5f0ad2e90d175cafdb797d', @@ -26,8 +26,8 @@ class AngelIE(InfoExtractor): 'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name', 'description': 'md5:aadfb4827a94415de5ff6426e6dee3be', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 3276.0 - } + 'duration': 3276.0, + }, }] def _real_extract(self, url): @@ -44,7 +44,7 @@ class AngelIE(InfoExtractor): 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } # Angel uses cloudinary in the background and supports image transformations. diff --git a/yt_dlp/extractor/antenna.py b/yt_dlp/extractor/antenna.py index 2929d65..b1a0179 100644 --- a/yt_dlp/extractor/antenna.py +++ b/yt_dlp/extractor/antenna.py @@ -105,7 +105,7 @@ class Ant1NewsGrArticleIE(AntennaBaseIE): info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) if not embed_urls: - raise ExtractorError('no videos found for %s' % video_id, expected=True) + raise ExtractorError(f'no videos found for {video_id}', expected=True) return self.playlist_from_matches( embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 0df5033..bf3d60b 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -238,7 +238,7 @@ class AnvatoIE(InfoExtractor): 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', - 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' + 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', } def _generate_nfl_token(self, anvack, mcp_id): @@ -255,7 +255,7 @@ class AnvatoIE(InfoExtractor): token } } -}''' % (anvack, mcp_id), +}''' % (anvack, mcp_id), # noqa: UP031 }).encode(), headers={ 'Authorization': auth_token, 'Content-Type': 'application/json', @@ -299,7 +299,7 @@ class AnvatoIE(InfoExtractor): return self._download_json( video_data_url, video_id, transform_source=strip_jsonp, query=query, - data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8')) + data=json.dumps({'api': api}, separators=(',', ':')).encode()) def _get_anvato_videos(self, access_key, video_id, token): video_data = self._get_video_json(access_key, video_id, token) @@ -358,7 +358,7 @@ class AnvatoIE(InfoExtractor): for caption in video_data.get('captions', []): a_caption = { 'url': caption['url'], - 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None + 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None, } subtitles.setdefault(caption['language'], []).append(a_caption) subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs) diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 455f667..893dce7 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -30,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # video with vidible ID 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', @@ -46,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'only_matching': True, @@ -83,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE return self._extract_yahoo_video(video_id, 'us') response = self._download_json( - 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, + f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details', video_id)['response'] if response['statusText'] != 'Ok': - raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True) video_data = response['data'] formats = [] diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index 1ea0b1d..fed5970 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -34,7 +34,7 @@ class APAIE(InfoExtractor): video_id, base_url = mobj.group('id', 'base_url') webpage = self._download_webpage( - '%s/player/%s' % (base_url, video_id), video_id) + f'{base_url}/player/{video_id}', video_id) jwplatform_id = self._search_regex( r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage, @@ -47,7 +47,7 @@ class APAIE(InfoExtractor): def extract(field, name=None): return self._search_regex( - r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field, + rf'\b{field}["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, name or field, default=None, group='value') title = extract('title') or video_id diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index 49bbeab..bd301e9 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -24,7 +24,7 @@ class ApplePodcastsIE(InfoExtractor): 'duration': 6454, 'series': 'The Tim Dillon Show', 'thumbnail': 're:.+[.](png|jpe?g|webp)', - } + }, }, { 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', 'only_matching': True, diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 21103ae..0a600f6 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,8 +1,8 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( int_or_none, parse_duration, @@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor): 'uploader_id': 'wb', }, }, - ] + ], }, { 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 'info_dict': { @@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor): webpage = self._download_webpage(url, movie) film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') film_data = self._download_json( - 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, + f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json', film_id, fatal=False) if film_data: @@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor): if not src: continue formats.append({ - 'format_id': '%s-%s' % (version, size), + 'format_id': f'{version}-{size}', 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'width': int_or_none(size_data.get('width')), 'height': int_or_none(size_data.get('height')), @@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor): page_data = film_data.get('page', {}) return self.playlist_result(entries, film_id, page_data.get('movie_title')) - playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') + playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc') def fix_html(s): s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) @@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor): # like: http://trailers.apple.com/trailers/wb/gravity/ def _clean_json(m): - return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + return 'iTunes.playURL({});'.format(m.group(1).replace('\'', ''')) s = re.sub(self._JSON_RE, _clean_json, s) - s = '<html>%s</html>' % s - return s + return f'<html>{s}</html>' doc = self._download_xml(playlist_url, movie, transform_source=fix_html) playlist = [] @@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor): duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() - settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) + settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json') settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') formats = [] - for format in settings['metadata']['sizes']: + for fmt in settings['metadata']['sizes']: # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src']) formats.append({ 'url': format_url, - 'format': format['type'], - 'width': int_or_none(format['width']), - 'height': int_or_none(format['height']), + 'format': fmt['type'], + 'width': int_or_none(fmt['width']), + 'height': int_or_none(fmt['height']), }) playlist.append({ @@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor): 'title': 'Movie Studios', }, } - _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS) + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>{})'.format('|'.join(_SECTIONS)) _TESTS = [{ 'url': 'http://trailers.apple.com/#section=justadded', 'info_dict': { @@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor): def _real_extract(self, url): section = self._match_id(url) section_data = self._download_json( - 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], + 'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']), section) entries = [ self.url_result('http://trailers.apple.com' + e['location']) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 41f3a4f..f5a55ef 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,10 +1,11 @@ +from __future__ import annotations + import json import re import urllib.parse from .common import InfoExtractor from .youtube import YoutubeBaseInfoExtractor, YoutubeIE -from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..networking.exceptions import HTTPError from ..utils import ( @@ -145,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'Bells Of Rostov', 'ext': 'mp3', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3', 'md5': '1d0aabe03edca83ca58d9ed3b493a3c3', @@ -158,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor): 'description': 'md5:012b2d668ae753be36896f343d12a236', 'upload_date': '20190928', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { # Original formats are private 'url': 'https://archive.org/details/irelandthemakingofarepublic', @@ -202,8 +203,8 @@ class ArchiveOrgIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'display_id': 'irelandthemakingofarepublicreel2.mov', }, - } - ] + }, + ], }] @staticmethod @@ -220,7 +221,7 @@ class ArchiveOrgIE(InfoExtractor): def _real_extract(self, url): video_id = urllib.parse.unquote_plus(self._match_id(url)) - identifier, entry_id = (video_id.split('/', 1) + [None])[:2] + identifier, _, entry_id = video_id.partition('/') # Archive.org metadata API doesn't clearly demarcate playlist entries # or subtitle tracks, so we get them from the embeddable player. @@ -246,7 +247,7 @@ class ArchiveOrgIE(InfoExtractor): if track['kind'] != 'subtitles': continue entries[p['orig']][track['label']] = { - 'url': 'https://archive.org/' + track['file'].lstrip('/') + 'url': 'https://archive.org/' + track['file'].lstrip('/'), } metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) @@ -293,7 +294,9 @@ class ArchiveOrgIE(InfoExtractor): 'height': int_or_none(f.get('width')), 'filesize': int_or_none(f.get('size'))}) - extension = (f['name'].rsplit('.', 1) + [None])[1] + _, has_ext, extension = f['name'].rpartition('.') + if not has_ext: + extension = None # We don't want to skip private formats if the user has access to them, # however without access to an account with such privileges we can't implement/test this. @@ -308,7 +311,7 @@ class ArchiveOrgIE(InfoExtractor): 'filesize': int_or_none(f.get('size')), 'protocol': 'https', 'source_preference': 0 if f.get('source') == 'original' else -1, - 'format_note': f.get('source') + 'format_note': f.get('source'), }) for entry in entries.values(): @@ -371,7 +374,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/Zeurel', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg', - } + }, }, { # Internal link 'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0', @@ -388,7 +391,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/1veritasium', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA', - } + }, }, { # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description. # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description @@ -403,8 +406,8 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'machinima', 'uploader_url': 'https://www.youtube.com/user/machinima', 'thumbnail': r're:https?://.*\.(jpg|webp)', - 'uploader': 'machinima' - } + 'uploader': 'machinima', + }, }, { # FLV video. Video file URL does not provide itag information 'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw', @@ -421,7 +424,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'jawed', - } + }, }, { 'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA', 'info_dict': { @@ -437,7 +440,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/itsmadeon', 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # First capture is of dead video, second is the oldest from CDX response. 'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E', @@ -454,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'ETC News', - } + }, }, { # First capture of dead video, capture date in link links to dead capture. 'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E', @@ -473,15 +476,15 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'ETC News', }, 'expected_warnings': [ - r'unable to download capture webpage \(it may not be archived\)' - ] + r'unable to download capture webpage \(it may not be archived\)', + ], }, { # Very old YouTube page, has - YouTube in title. 'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg', 'info_dict': { 'id': '-06-KB9XTzg', 'ext': 'flv', - 'title': 'New Coin Hack!! 100% Safe!!' - } + 'title': 'New Coin Hack!! 100% Safe!!', + }, }, { 'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8', 'info_dict': { @@ -495,7 +498,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'DankPods', - } + }, }, { # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093 'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4', @@ -512,7 +515,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'PewDiePie', 'uploader_url': 'https://www.youtube.com/user/PewDiePie', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~June 2010 Capture. swfconfig 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y', @@ -527,7 +530,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', 'upload_date': '20090520', - } + }, }, { # Jan 2011: watch-video-date/eow-date surrounded by whitespace 'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc', @@ -542,7 +545,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 132, 'uploader_url': 'https://www.youtube.com/user/claybutlermusic', - } + }, }, { # ~May 2009 swfArgs. ytcfg is spread out over various vars 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY', @@ -557,7 +560,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 754, - } + }, }, { # ~June 2012. Upload date is in another lang so cannot extract. 'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA', @@ -571,7 +574,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'BlackNerdComedy', 'duration': 182, 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~July 2013 'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM', @@ -587,7 +590,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ', 'upload_date': '20060428', 'uploader': 'punkybird', - } + }, }, { # April 2020: Player response in player config 'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en', @@ -604,7 +607,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341', 'uploader_url': 'https://www.youtube.com/user/GameGrumps', - } + }, }, { # watch7-user-header with yt-user-info 'url': 'ytarchive:kbh4T_b4Ixw:20160307085057', @@ -619,7 +622,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'upload_date': '20150503', 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA', - } + }, }, { # April 2012 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU', @@ -634,35 +637,35 @@ class YoutubeWebArchiveIE(InfoExtractor): 'duration': 200, 'upload_date': '20120407', 'uploader_id': 'thecomputernerd01', - } + }, }, { 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M', - 'only_matching': True + 'only_matching': True, }, { # Video not archived, only capture is unavailable video page 'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10', - 'only_matching': True + 'only_matching': True, }, { # Encoded url 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc:20050214000000', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc', - 'only_matching': True + 'only_matching': True, }, ] _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE @@ -673,13 +676,13 @@ class YoutubeWebArchiveIE(InfoExtractor): _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers _YT_ALL_THUMB_SERVERS = orderedSet( - _YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]]) + [*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]]) _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/' _OLDEST_CAPTURE_DATE = 20050214000000 _NEWEST_CAPTURE_DATE = 20500101000000 - def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False): + def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False): # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md query = { 'url': url, @@ -688,14 +691,14 @@ class YoutubeWebArchiveIE(InfoExtractor): 'limit': 500, 'filter': ['statuscode:200'] + (filters or []), 'collapse': collapse or [], - **(query or {}) + **(query or {}), } res = self._download_json( 'https://web.archive.org/cdx/search/cdx', item_id, note or 'Downloading CDX API JSON', query=query, fatal=fatal) if isinstance(res, list) and len(res) >= 2: # format response to make it easier to use - return list(dict(zip(res[0], v)) for v in res[1:]) + return [dict(zip(res[0], v)) for v in res[1:]] elif not isinstance(res, list) or len(res) != 0: self.report_warning('Error while parsing CDX API response' + bug_reports_message()) @@ -852,7 +855,7 @@ class YoutubeWebArchiveIE(InfoExtractor): { 'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'), 'filesize': int_or_none(thumbnail_dict.get('length')), - 'preference': int_or_none(thumbnail_dict.get('length')) + 'preference': int_or_none(thumbnail_dict.get('length')), } for thumbnail_dict in response) if not try_all: break @@ -893,7 +896,7 @@ class YoutubeWebArchiveIE(InfoExtractor): for retry in retry_manager: try: urlh = self._request_webpage( - HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id), + HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'), video_id, note='Fetching archived video file url', expected_status=True) except ExtractorError as e: # HTTP Error 404 is expected if the video is not saved. @@ -924,21 +927,21 @@ class YoutubeWebArchiveIE(InfoExtractor): info['thumbnails'] = self._extract_thumbnails(video_id) if urlh: - url = compat_urllib_parse_unquote(urlh.url) + url = urllib.parse.unquote(urlh.url) video_file_url_qs = parse_qs(url) # Attempt to recover any ext & format info from playback url & response headers - format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} + fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} itag = try_get(video_file_url_qs, lambda x: x['itag'][0]) if itag and itag in YoutubeIE._formats: - format.update(YoutubeIE._formats[itag]) - format.update({'format_id': itag}) + fmt.update(YoutubeIE._formats[itag]) + fmt.update({'format_id': itag}) else: mime = try_get(video_file_url_qs, lambda x: x['mime'][0]) ext = (mimetype2ext(mime) or urlhandle_detect_ext(urlh) or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type'))) - format.update({'ext': ext}) - info['formats'] = [format] + fmt.update({'ext': ext}) + info['formats'] = [fmt] if not info.get('duration'): info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0])) diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index febd3d2..8da9bc4 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( extract_attributes, int_or_none, + join_nonempty, parse_iso8601, try_get, ) @@ -11,7 +12,7 @@ from ..utils import ( class ArcPublishingIE(InfoExtractor): _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - _VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX + _VALID_URL = rf'arcpublishing:(?P<org>[a-z]+):(?P<id>{_UUID_REGEX})' _TESTS = [{ # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', @@ -74,12 +75,12 @@ class ArcPublishingIE(InfoExtractor): def _extract_embed_urls(cls, url, webpage): entries = [] # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview - for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage): + for powa_el in re.findall(rf'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage): powa = extract_attributes(powa_el) or {} org = powa.get('data-org') uuid = powa.get('data-uuid') if org and uuid: - entries.append('arcpublishing:%s:%s' % (org, uuid)) + entries.append(f'arcpublishing:{org}:{uuid}') return entries def _real_extract(self, url): @@ -122,7 +123,7 @@ class ArcPublishingIE(InfoExtractor): elif stream_type in ('ts', 'hls'): m3u8_formats = self._extract_m3u8_formats( s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) - if all([f.get('acodec') == 'none' for f in m3u8_formats]): + if all(f.get('acodec') == 'none' for f in m3u8_formats): continue for f in m3u8_formats: height = f.get('height') @@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor): else: vbr = int_or_none(s.get('bitrate')) formats.append({ - 'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type, + 'format_id': join_nonempty(stream_type, vbr), 'vbr': vbr, 'width': int_or_none(s.get('width')), 'height': int_or_none(s.get('height')), diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 3db59c5..6fd6413 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -85,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor): formats.extend(self._extract_f4m_formats( update_url_query(stream_url, { 'hdcore': '3.1.1', - 'plugin': 'aasp-3.1.1.69.124' + 'plugin': 'aasp-3.1.1.69.124', }), video_id, f4m_id='hds', fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -96,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor): f = { 'url': server, 'play_path': stream_url, - 'format_id': 'a%s-rtmp-%s' % (num, quality), + 'format_id': f'a{num}-rtmp-{quality}', } else: f = { 'url': stream_url, - 'format_id': 'a%s-%s-%s' % (num, ext, quality) + 'format_id': f'a{num}-{ext}-{quality}', } m = re.search( r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index de36ec8..aa6c5ca 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor): raise ExtractorError('Invalid URL', expected=True) media = self._download_json( - 'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id), + f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}', video_id, query={ # https://video.qbrick.com/docs/api/examples/library-api.html 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags', @@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor): formats.extend(self._extract_f4m_formats( href, video_id, f4m_id='hds', fatal=False)) elif mime_type == 'application/dash+xml': - formats.extend(self._extract_f4m_formats( - href, video_id, f4m_id='hds', fatal=False)) + formats.extend(self._extract_mpd_formats( + href, video_id, mpd_id='dash', fatal=False)) elif mime_type == 'application/vnd.ms-sstr+xml': formats.extend(self._extract_ism_formats( href, video_id, ism_id='mss', fatal=False)) diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 9a5524a..f196f61 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( float_or_none, format_field, @@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor): 'view_count': int, 'tags': ['linearna_algebra'], 'start_time': 10, - } + }, }, { 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4', 'only_matching': True, @@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor): 'duration': float_or_none(video.get('duration'), 1000), 'view_count': int_or_none(video.get('views')), 'tags': video.get('hashtags'), - 'start_time': int_or_none(compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get('t', [None])[0]), + 'start_time': int_or_none(urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get('t', [None])[0]), } diff --git a/yt_dlp/extractor/art19.py b/yt_dlp/extractor/art19.py index 271c505..deec7ad 100644 --- a/yt_dlp/extractor/art19.py +++ b/yt_dlp/extractor/art19.py @@ -153,7 +153,7 @@ class Art19IE(InfoExtractor): 'series_id': ('series_id', {str}), 'timestamp': ('created_at', {parse_iso8601}), 'release_timestamp': ('released_at', {parse_iso8601}), - 'modified_timestamp': ('updated_at', {parse_iso8601}) + 'modified_timestamp': ('updated_at', {parse_iso8601}), })), **traverse_obj(rss_metadata, ('content', { 'title': ('episode_title', {str}), diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 46fe006..142d4b0 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -20,15 +20,15 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVIE(ArteTVBaseIE): - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) (?:https?:// (?: - (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos| - api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s) + (?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos| + api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>{ArteTVBaseIE._ARTE_LANGUAGES}) ) |arte://program) - /(?P<id>\d{6}-\d{3}-[AF]|LIVE) - ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES} + /(?P<id>\d{{6}}-\d{{3}}-[AF]|LIVE) + ''' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'only_matching': True, @@ -145,7 +145,7 @@ class ArteTVIE(ArteTVBaseIE): language_code = self._LANG_MAP.get(lang) config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={ - 'x-validated-age': '18' + 'x-validated-age': '18', }) geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {} @@ -247,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor): 'description': 'md5:be40b667f45189632b78c1425c7c2ce1', 'upload_date': '20201116', }, - 'skip': 'No video available' + 'skip': 'No video available', }, { 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'only_matching': True, @@ -262,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor): class ArteTVPlaylistIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>RC-\d{{6}})' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'only_matching': True, @@ -298,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): class ArteTVCategoryIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P<lang>{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/politics-and-society/', 'info_dict': { @@ -312,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE): @classmethod def suitable(cls, url): return ( - not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, )) + not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE)) and super().suitable(url)) def _real_extract(self, url): @@ -321,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE): items = [] for video in re.finditer( - r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang, + rf'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)', webpage): video = video.group('url') if video == url: continue - if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): + if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)): items.append(video) title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 3a44e52..0fe95be 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -20,7 +20,7 @@ class AtresPlayerIE(InfoExtractor): 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', 'duration': 3413, }, - 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', @@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor): ] _API_BASE = 'https://api.atresplayer.com/' - def _handle_error(self, e, code): - if isinstance(e.cause, HTTPError) and e.cause.status == code: - error = self._parse_json(e.cause.response.read(), None) - if error.get('error') == 'required_registered': - self.raise_login_required() - raise ExtractorError(error['error_description'], expected=True) - raise - def _perform_login(self, username, password): self._request_webpage( self._API_BASE + 'login', None, 'Downloading login page') @@ -49,13 +41,15 @@ class AtresPlayerIE(InfoExtractor): target_url = self._download_json( 'https://account.atresmedia.com/api/login', None, 'Logging in', headers={ - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }, data=urlencode_postdata({ 'username': username, 'password': password, }))['targetUrl'] except ExtractorError as e: - self._handle_error(e, 400) + if isinstance(e.cause, HTTPError) and e.cause.status == 400: + raise ExtractorError('Invalid username and/or password', expected=True) + raise self._request_webpage(target_url, None, 'Following Target URL') @@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor): episode = self._download_json( self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) except ExtractorError as e: - self._handle_error(e, 403) + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + error = self._parse_json(e.cause.response.read(), None) + if error.get('error') == 'required_registered': + self.raise_login_required() + raise ExtractorError(error['error_description'], expected=True) + raise title = episode['titulo'] diff --git a/yt_dlp/extractor/atscaleconf.py b/yt_dlp/extractor/atscaleconf.py index 3f7b1e9..b219eee 100644 --- a/yt_dlp/extractor/atscaleconf.py +++ b/yt_dlp/extractor/atscaleconf.py @@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'data-scale-spring-2022', 'title': 'Data @Scale Spring 2022', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }, { 'url': 'https://atscaleconference.com/events/video-scale-2021/', @@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'video-scale-2021', 'title': 'Video @Scale 2021', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) return self.playlist_from_matches( re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), - ie='Generic', playlist_id=id, + ie='Generic', playlist_id=playlist_id, title=self._og_search_title(webpage), description=self._og_search_description(webpage)) diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 20ee34c..37bb616 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor): 'id': 'v-ce9cgn1e70n5-1', 'ext': 'mp4', 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', - } + }, }, { 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', 'only_matching': True, @@ -66,10 +66,10 @@ class ATVAtIE(InfoExtractor): video_id=video_id) video_title = json_data['views']['default']['page']['title'] - contentResource = json_data['views']['default']['page']['contentResource'] - content_id = contentResource[0]['id'] - content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']} - for id, content in enumerate(contentResource)] + content_resource = json_data['views']['default']['page']['contentResource'] + content_id = content_resource[0]['id'] + content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']} + for id_, content in enumerate(content_resource)] time_of_request = dt.datetime.now() not_before = time_of_request - dt.timedelta(minutes=5) @@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor): videos = self._download_json( 'https://vas-v4.p7s1video.net/4.0/getsources', content_id, 'Downloading videos JSON', query={ - 'token': jwt_token.decode('utf-8') + 'token': jwt_token.decode('utf-8'), }) - video_id, videos_data = list(videos['data'].items())[0] + video_id, videos_data = next(iter(videos['data'].items())) error_msg = try_get(videos_data, lambda x: x['error']['title']) if error_msg == 'Geo check failed': self.raise_geo_restricted(error_msg) elif error_msg: raise ExtractorError(error_msg) entries = [ - self._extract_video_info(url, contentResource[video['id']], video) + self._extract_video_info(url, content_resource[video['id']], video) for video in videos_data] return { diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index 35114e5..c5a9c7e 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor): 'timestamp': 1448354940, 'duration': 74022, 'view_count': int, - } + }, }, { 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', 'only_matching': True, @@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor): bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) if bitrate: f.update({ - 'format_id': 'http-%s' % bitrate, + 'format_id': f'http-{bitrate}', }) formats.append(f) diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py index a23fcd2..751b74a 100644 --- a/yt_dlp/extractor/audioboom.py +++ b/yt_dlp/extractor/audioboom.py @@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 4000.99, 'uploader': 'Sue Perkins: An hour or so with...', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', - } + }, }, { # Direct mp3-file link 'url': 'https://audioboom.com/posts/8128496.mp3', 'md5': 'e329edf304d450def95c7f86a9165ee1', @@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 1689.7, 'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904', - } + }, }, { 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', 'only_matching': True, diff --git a/yt_dlp/extractor/audiodraft.py b/yt_dlp/extractor/audiodraft.py index 71e5afd..484ad4e 100644 --- a/yt_dlp/extractor/audiodraft.py +++ b/yt_dlp/extractor/audiodraft.py @@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor): headers={ 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest', - }, data=f'id={player_entry_id}'.encode('utf-8')) + }, data=f'id={player_entry_id}'.encode()) return { 'id': str(data_json['entry_id']), @@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) - player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_entry_id = self._search_regex( + r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id') return self._audiodraft_extract_from_id(player_entry_id) @@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - return self._audiodraft_extract_from_id(f'player_entry_{id}') + video_id = self._match_id(url) + return self._audiodraft_extract_from_id(f'player_entry_{video_id}') diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index 5c4160f..1d4460c 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -3,7 +3,6 @@ import time from .common import InfoExtractor from .soundcloud import SoundcloudIE -from ..compat import compat_str from ..utils import ( ExtractorError, url_basename, @@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor): 'id': '310086', 'ext': 'mp3', 'uploader': 'Roosh Williams', - 'title': 'Extraordinary' - } + 'title': 'Extraordinary', + }, }, # audiomack wrapper around soundcloud song # Needs new test URL. @@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor): # API is inconsistent with errors if 'url' not in api_response or not api_response['url'] or 'error' in api_response: - raise ExtractorError('Invalid url %s' % url) + raise ExtractorError(f'Invalid url {url}') # Audiomack wraps a lot of soundcloud tracks in their branded wrapper # if so, pass the work off to the soundcloud extractor @@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor): return self.url_result(api_response['url'], SoundcloudIE.ie_key()) return { - 'id': compat_str(api_response.get('id', album_url_tag)), + 'id': str(api_response.get('id', album_url_tag)), 'uploader': api_response.get('artist'), 'title': api_response.get('title'), 'url': api_response['url'], @@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor): 'info_dict': { 'id': '812251', - 'title': 'Tha Tour: Part 2 (Official Mixtape)' - } + 'title': 'Tha Tour: Part 2 (Official Mixtape)', + }, }, # Album playlist ripped from fakeshoredrive with no metadata { @@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor): 'id': '837576', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }, { 'info_dict': { 'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)', 'id': '837580', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }], - } + }, ] def _real_extract(self, url): @@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor): api_response = self._download_json( 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' % (album_url_tag, track_no, time.time()), album_url_tag, - note='Querying song information (%d)' % (track_no + 1)) + note=f'Querying song information ({track_no + 1})') # Total failure, only occurs when url is totally wrong # Won't happen in middle of valid playlist (next case) if 'url' not in api_response or 'error' in api_response: - raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) + raise ExtractorError(f'Invalid url for track {track_no} of album url {url}') # URL is good but song id doesn't exist - usually means end of playlist elif not api_response['url']: break @@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor): # Pull out the album metadata and add to result (if it exists) for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: if apikey in api_response and resultkey not in result: - result[resultkey] = compat_str(api_response[apikey]) + result[resultkey] = str(api_response[apikey]) song_id = url_basename(api_response['url']).rpartition('.')[0] result['entries'].append({ - 'id': compat_str(api_response.get('id', song_id)), + 'id': str(api_response.get('id', song_id)), 'uploader': api_response.get('artist'), 'title': api_response.get('title', song_id), 'url': api_response['url'], diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index 6448b44..c611c6e 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -1,7 +1,7 @@ import random +import urllib.parse from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote from ..utils import ExtractorError, str_or_none, try_get @@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor): if response_data is not None: return response_data if len(response) == 1 and 'message' in response: - raise ExtractorError('API error: %s' % response['message'], + raise ExtractorError('API error: {}'.format(response['message']), expected=True) raise ExtractorError('Unexpected API response') def _select_api_base(self): """Selecting one of the currently available API hosts""" - response = super(AudiusBaseIE, self)._download_json( + response = super()._download_json( 'https://api.audius.co/', None, note='Requesting available API hosts', errnote='Unable to request available API hosts') @@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor): anything from this link, since the Audius API won't be able to resolve this url """ - url = compat_urllib_parse_unquote(url) - title = compat_urllib_parse_unquote(title) + url = urllib.parse.unquote(url) + title = urllib.parse.unquote(title) if '/' in title or '%2F' in title: fixed_title = title.replace('/', '%5C').replace('%2F', '%5C') return url.replace(title, fixed_title) @@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor): if self._API_BASE is None: self._select_api_base() try: - response = super(AudiusBaseIE, self)._download_json( - '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note, + response = super()._download_json( + f'{self._API_BASE}{self._API_V}{path}', item_id, note=note, errnote=errnote, expected_status=expected_status) except ExtractorError as exc: # some of Audius API hosts may not work as expected and return HTML - if 'Failed to parse JSON' in compat_str(exc): + if 'Failed to parse JSON' in str(exc): raise ExtractorError('An error occurred while receiving data. Try again', expected=True) raise exc return self._get_response_data(response) def _resolve_url(self, url, item_id): - return self._api_request('/resolve?url=%s' % url, item_id, + return self._api_request(f'/resolve?url={url}', item_id, expected_status=404) @@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, { # Regular track @@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, ] _ARTWORK_MAP = { - "150x150": 150, - "480x480": 480, - "1000x1000": 1000 + '150x150': 150, + '480x480': 480, + '1000x1000': 1000, } def _real_extract(self, url): @@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE): else: # API link title = None # uploader = None - track_data = self._api_request('/tracks/%s' % track_id, track_id) + track_data = self._api_request(f'/tracks/{track_id}', track_id) if not isinstance(track_data, dict): raise ExtractorError('Unexpected API response') @@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE): if isinstance(artworks_data, dict): for quality_key, thumbnail_url in artworks_data.items(): thumbnail = { - "url": thumbnail_url + 'url': thumbnail_url, } quality_code = self._ARTWORK_MAP.get(quality_key) if quality_code is not None: @@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE): return { 'id': track_id, 'title': track_data.get('title', title), - 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id), + 'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream', 'ext': 'mp3', 'description': track_data.get('description'), 'duration': track_data.get('duration'), 'track': track_data.get('title'), - 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str), + 'artist': try_get(track_data, lambda x: x['user']['name'], str), 'genre': track_data.get('genre'), 'thumbnails': thumbnails, 'view_count': track_data.get('play_count'), @@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE _TESTS = [ { 'url': 'audius:9RWlo', - 'only_matching': True + 'only_matching': True, }, { 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo', - 'only_matching': True + 'only_matching': True, }, ] @@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE): if not track_id: raise ExtractorError('Unable to get track ID from playlist') entries.append(self.url_result( - 'audius:%s' % track_id, + f'audius:{track_id}', ie=AudiusTrackIE.ie_key(), video_id=track_id)) return entries @@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE): raise ExtractorError('Unable to get playlist ID') playlist_tracks = self._api_request( - '/playlists/%s/tracks' % playlist_id, + f'/playlists/{playlist_id}/tracks', title, note='Downloading playlist tracks metadata', errnote='Unable to download playlist tracks metadata') if not isinstance(playlist_tracks, list): @@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I profile_audius_id = _profile_data[0]['id'] profile_bio = _profile_data[0].get('bio') - api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id) + api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id) return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index a8dfb3e..4066a5a 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -1,10 +1,7 @@ import base64 +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( format_field, int_or_none, @@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor): show_id, video_id, season_id = self._match_valid_url(url).groups() if video_id and int(video_id) > 0: return self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') + f'http://awaan.ae/media/{video_id}', 'AWAANVideo') elif season_id and int(season_id) > 0: return self.url_result(smuggle_url( - 'http://awaan.ae/program/season/%s' % season_id, + f'http://awaan.ae/program/season/{season_id}', {'show_id': show_id}), 'AWAANSeason') else: return self.url_result( - 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') + f'http://awaan.ae/program/{show_id}', 'AWAANSeason') class AWAANBaseIE(InfoExtractor): @@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE): video_id = self._match_id(url) video_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}', video_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(video_data, video_id, False) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({ 'id': video_data['id'], 'user_id': video_data['user_id'], 'signature': video_data['signature'], @@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE): channel_id = self._match_id(url) channel_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}', channel_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(channel_data, channel_id, True) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), 'signature': channel_data['signature'], @@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor): show_id = smuggled_data.get('show_id') if show_id is None: season = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}', season_id, headers={'Origin': 'http://awaan.ae'}) show_id = season['id'] data['show_id'] = show_id @@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor): 'http://admin.mangomolo.com/analytics/index.php/plus/show', show_id, data=urlencode_postdata(data), headers={ 'Origin': 'http://awaan.ae', - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }) if not season_id: season_id = show['default_season'] @@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor): entries = [] for video in show['videos']: - video_id = compat_str(video['id']) + video_id = str(video['id']) entries.append(self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) + f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id)) return self.playlist_result(entries, season_id, title) diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index 4ebef92..177c410 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -1,9 +1,9 @@ import datetime as dt import hashlib import hmac +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor @@ -18,20 +18,20 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with 'Accept': 'application/json', 'Host': self._AWS_PROXY_HOST, 'X-Amz-Date': amz_date, - 'X-Api-Key': self._AWS_API_KEY + 'X-Api-Key': self._AWS_API_KEY, } session_token = aws_dict.get('session_token') if session_token: headers['X-Amz-Security-Token'] = session_token def aws_hash(s): - return hashlib.sha256(s.encode('utf-8')).hexdigest() + return hashlib.sha256(s.encode()).hexdigest() # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - canonical_querystring = compat_urllib_parse_urlencode(query) + canonical_querystring = urllib.parse.urlencode(query) canonical_headers = '' for header_name, header_value in sorted(headers.items()): - canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) + canonical_headers += f'{header_name.lower()}:{header_value}\n' signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) canonical_request = '\n'.join([ 'GET', @@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with canonical_querystring, canonical_headers, signed_headers, - aws_hash('') + aws_hash(''), ]) # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html @@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html def aws_hmac(key, msg): - return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) + return hmac.new(key, msg.encode(), hashlib.sha256) def aws_hmac_digest(key, msg): return aws_hmac(key, msg).digest() @@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with def aws_hmac_hexdigest(key, msg): return aws_hmac(key, msg).hexdigest() - k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') + k_signing = ('AWS4' + aws_dict['secret_key']).encode() for value in credential_scope_list: k_signing = aws_hmac_digest(k_signing, value) @@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html headers['Authorization'] = ', '.join([ - '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), - 'SignedHeaders=%s' % signed_headers, - 'Signature=%s' % signature, + '{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), + f'SignedHeaders={signed_headers}', + f'Signature={signature}', ]) return self._download_json( - 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), + 'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), video_id, headers=headers) diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index d1686ee..0e3a03f 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -38,14 +38,14 @@ class AZMedienIE(InfoExtractor): 'timestamp': 1538328802, 'view_count': int, 'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031', - 'duration': 1930 + 'duration': 1930, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', - 'only_matching': True + 'only_matching': True, }] _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be' _PARTNER_ID = '1719221' @@ -62,5 +62,5 @@ class AZMedienIE(InfoExtractor): })['data']['context']['mainAsset']['video']['kaltura']['kalturaId'] return self.url_result( - 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), + f'kaltura:{self._PARTNER_ID}:{entry_id}', ie=KalturaIE.ie_key(), video_id=entry_id) diff --git a/yt_dlp/extractor/baidu.py b/yt_dlp/extractor/baidu.py index 8786d67..a1ad424 100644 --- a/yt_dlp/extractor/baidu.py +++ b/yt_dlp/extractor/baidu.py @@ -24,8 +24,9 @@ class BaiduVideoIE(InfoExtractor): }] def _call_api(self, path, category, playlist_id, note): - return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( - path, category, playlist_id), playlist_id, note) + return self._download_json( + f'http://app.video.baidu.com/{path}/?worktype=adnative{category}&id={playlist_id}', + playlist_id, note) def _real_extract(self, url): category, playlist_id = self._match_valid_url(url).groups() @@ -44,7 +45,7 @@ class BaiduVideoIE(InfoExtractor): 'xqsingle', category, playlist_id, 'Download episodes JSON metadata') entries = [self.url_result( - episode['url'], video_title=episode['title'] + episode['url'], video_title=episode['title'], ) for episode in episodes_detail['videos']] return self.playlist_result( diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index c4e07a7..d10bdf8 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -1,10 +1,7 @@ import math +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( InAdvancePagedList, format_field, @@ -20,8 +17,8 @@ class BanByeBaseIE(InfoExtractor): @staticmethod def _extract_playlist_id(url, param='playlist'): - return compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get(param, [None])[0] + return urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get(param, [None])[0] def _extract_playlist(self, playlist_id): data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index e89b3a6..61cbab5 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -3,7 +3,6 @@ import re import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, @@ -42,7 +41,7 @@ class BandcampIE(InfoExtractor): 'uploader_id': 'youtube-dl', 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', }, - '_skip': 'There is a limit of 200 free downloads / month for the test song' + 'skip': 'There is a limit of 200 free downloads / month for the test song', }, { # free download 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', @@ -119,7 +118,7 @@ class BandcampIE(InfoExtractor): def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True): return self._parse_json(self._html_search_regex( - r'data-%s=(["\'])({.+?})\1' % attr, webpage, + rf'data-{attr}=(["\'])({{.+?}})\1', webpage, attr + ' data', group=2), video_id, fatal=fatal) def _real_extract(self, url): @@ -167,7 +166,7 @@ class BandcampIE(InfoExtractor): download_link = tralbum.get('freeDownloadPage') if download_link: - track_id = compat_str(tralbum['id']) + track_id = str(tralbum['id']) download_webpage = self._download_webpage( download_link, track_id, 'Downloading free downloads page') @@ -192,7 +191,7 @@ class BandcampIE(InfoExtractor): if isinstance(download_formats_list, list): for f in blob['download_formats']: name, ext = f.get('name'), f.get('file_extension') - if all(isinstance(x, compat_str) for x in (name, ext)): + if all(isinstance(x, str) for x in (name, ext)): download_formats[name] = ext.strip('.') for format_id, f in downloads.items(): @@ -207,7 +206,7 @@ class BandcampIE(InfoExtractor): }) format_id = f.get('encoding_name') or format_id stat = self._download_json( - stat_url, track_id, 'Downloading %s JSON' % format_id, + stat_url, track_id, f'Downloading {format_id} JSON', transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], fatal=False) if not stat: @@ -225,7 +224,7 @@ class BandcampIE(InfoExtractor): 'acodec': format_id.split('-')[0], }) - title = '%s - %s' % (artist, track) if artist else track + title = f'{artist} - {track}' if artist else track if not duration: duration = float_or_none(self._html_search_meta( @@ -267,7 +266,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311756226, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, { 'md5': '1a2c32e2691474643e912cc6cd4bffaa', @@ -278,7 +277,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311757238, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, ], 'info_dict': { @@ -287,9 +286,9 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'blazo', }, 'params': { - 'playlistend': 2 + 'playlistend': 2, }, - 'skip': 'Bandcamp imposes download limits.' + 'skip': 'Bandcamp imposes download limits.', }, { 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', 'info_dict': { @@ -324,7 +323,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): return (False if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) - else super(BandcampAlbumIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): uploader_id, album_id = self._match_valid_url(url).groups() @@ -376,7 +375,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE }, }, { 'url': 'https://bandcamp.com/?blah/blah@&show=228', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -407,7 +406,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE title = show.get('audio_title') or 'Bandcamp Weekly' subtitle = show.get('subtitle') if subtitle: - title += ' - %s' % subtitle + title += f' - {subtitle}' return { 'id': show_id, @@ -419,7 +418,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), 'episode_id': show_id, - 'formats': formats + 'formats': formats, } @@ -440,7 +439,7 @@ class BandcampUserIE(InfoExtractor): 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'id': 'dotscale', - 'title': 'Discography of dotscale' + 'title': 'Discography of dotscale', }, 'playlist_count': 1, }, { diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 82dc9ab..46f2978 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -23,7 +23,7 @@ class BannedVideoIE(InfoExtractor): 'description': 'md5:560d96f02abbebe6c6b78b47465f6b28', 'upload_date': '20200324', 'timestamp': 1585087895, - } + }, }] _GRAPHQL_GETMETADATA_QUERY = ''' @@ -84,15 +84,15 @@ query GetCommentReplies($id: String!) { 'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY, } - def _call_api(self, video_id, id, operation, note): + def _call_api(self, video_id, id_var, operation, note): return self._download_json( 'https://api.infowarsmedia.com/graphql', video_id, note=note, headers={ - 'Content-Type': 'application/json; charset=utf-8' + 'Content-Type': 'application/json; charset=utf-8', }, data=json.dumps({ - 'variables': {'id': id}, + 'variables': {'id': id_var}, 'operationName': operation, - 'query': self._GRAPHQL_QUERIES[operation] + 'query': self._GRAPHQL_QUERIES[operation], }).encode('utf8')).get('data') def _get_comments(self, video_id, comments, comment_data): @@ -151,5 +151,5 @@ query GetCommentReplies($id: String!) { 'tags': [tag.get('name') for tag in video_info.get('tags')], 'availability': self._availability(is_unlisted=video_info.get('unlisted')), 'comments': comments, - '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')) + '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')), } diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index f6b58b3..3af923f 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -2,10 +2,10 @@ import functools import itertools import json import re +import urllib.parse import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import compat_str, compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -35,7 +35,7 @@ class BBCCoUkIE(InfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})' - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) https?:// (?:www\.)?bbc\.co\.uk/ (?: @@ -45,8 +45,8 @@ class BBCCoUkIE(InfoExtractor): radio/player/| events/[^/]+/play/[^/]+/ ) - (?P<id>%s)(?!/(?:episodes|broadcasts|clips)) - ''' % _ID_REGEX + (?P<id>{_ID_REGEX})(?!/(?:episodes|broadcasts|clips)) + ''' _EMBED_REGEX = [r'setPlaylist\("(?P<url>https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)'] _LOGIN_URL = 'https://account.bbc.com/signin' @@ -75,7 +75,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/', @@ -148,7 +148,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz', 'note': 'Video', @@ -162,7 +162,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls', 'info_dict': { @@ -268,19 +268,19 @@ class BBCCoUkIE(InfoExtractor): error = clean_html(get_element_by_class('form-message', response)) if error: raise ExtractorError( - 'Unable to login: %s' % error, expected=True) + f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') class MediaSelectionError(Exception): - def __init__(self, id): - self.id = id + def __init__(self, error_id): + self.id = error_id def _extract_asx_playlist(self, connection, programme_id): asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') return [ref.get('href') for ref in asx.findall('./Entry/ref')] def _extract_items(self, playlist): - return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) + return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item') def _extract_medias(self, media_selection): error = media_selection.get('result') @@ -312,7 +312,7 @@ class BBCCoUkIE(InfoExtractor): def _raise_extractor_error(self, media_selection_error): raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, media_selection_error.id), + f'{self.IE_NAME} returned error: {media_selection_error.id}', expected=True) def _download_media_selector(self, programme_id): @@ -372,7 +372,7 @@ class BBCCoUkIE(InfoExtractor): for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): formats.append({ 'url': ref, - 'format_id': 'ref%s_%s' % (i, format_id), + 'format_id': f'ref{i}_{format_id}', }) elif transfer_format == 'dash': formats.extend(self._extract_mpd_formats( @@ -394,7 +394,7 @@ class BBCCoUkIE(InfoExtractor): href, programme_id, f4m_id=format_id, fatal=False)) else: if not supplier and bitrate: - format_id += '-%d' % bitrate + format_id += f'-{bitrate}' fmt = { 'format_id': format_id, 'filesize': file_size, @@ -423,9 +423,9 @@ class BBCCoUkIE(InfoExtractor): identifier = connection.get('identifier') server = connection.get('server') fmt.update({ - 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), + 'url': f'{protocol}://{server}/{application}?{auth_string}', 'play_path': identifier, - 'app': '%s?%s' % (application, auth_string), + 'app': f'{application}?{auth_string}', 'page_url': 'http://www.bbc.co.uk', 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', 'rtmp_live': False, @@ -441,7 +441,7 @@ class BBCCoUkIE(InfoExtractor): def _download_playlist(self, playlist_id): try: playlist = self._download_json( - 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id, + f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json', playlist_id, 'Downloading playlist JSON') formats = [] subtitles = {} @@ -480,32 +480,32 @@ class BBCCoUkIE(InfoExtractor): def _process_legacy_playlist(self, playlist_id): return self._process_legacy_playlist_url( - 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id) + f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id) def _download_legacy_playlist_url(self, url, playlist_id=None): return self._download_xml( url, playlist_id, 'Downloading legacy playlist XML') def _extract_from_legacy_playlist(self, playlist, playlist_id): - no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS) + no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems') if no_items is not None: reason = no_items.get('reason') if reason == 'preAvailability': - msg = 'Episode %s is not yet available' % playlist_id + msg = f'Episode {playlist_id} is not yet available' elif reason == 'postAvailability': - msg = 'Episode %s is no longer available' % playlist_id + msg = f'Episode {playlist_id} is no longer available' elif reason == 'noMedia': - msg = 'Episode %s is not currently available' % playlist_id + msg = f'Episode {playlist_id} is not currently available' else: - msg = 'Episode %s is not available: %s' % (playlist_id, reason) + msg = f'Episode {playlist_id} is not available: {reason}' raise ExtractorError(msg, expected=True) for item in self._extract_items(playlist): kind = item.get('kind') if kind not in ('programme', 'radioProgramme'): continue - title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text - description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS) + title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text + description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary') description = description_el.text if description_el is not None else None def get_programme_id(item): @@ -515,7 +515,7 @@ class BBCCoUkIE(InfoExtractor): if value and re.match(r'^[pb][\da-z]{7}$', value): return value get_from_attributes(item) - mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS) + mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator') if mediator is not None: return get_from_attributes(mediator) @@ -555,7 +555,7 @@ class BBCCoUkIE(InfoExtractor): if not programme_id: programme_id = self._search_regex( - r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None) + rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None) if programme_id: formats, subtitles = self._download_media_selector(programme_id) @@ -641,7 +641,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE }, 'params': { 'skip_download': True, - } + }, }, { # article with single video embedded with data-playable containing XML playlist # with direct video links as progressiveDownloadUrl (for now these are extracted) @@ -884,7 +884,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'bbc_world_service', 'series': 'CrowdScience', 'chapters': [], - } + }, }, { # onion routes 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576', 'only_matching': True, @@ -897,7 +897,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE) return (False if any(ie.suitable(url) for ie in EXCLUDE_IE) - else super(BBCIE, cls).suitable(url)) + else super().suitable(url)) def _extract_from_media_meta(self, media_meta, video_id): # Direct links to media in media metadata (e.g. @@ -1009,7 +1009,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if playlist: entry = None for key in ('streaming', 'progressiveDownload'): - playlist_url = playlist.get('%sUrl' % key) + playlist_url = playlist.get(f'{key}Url') if not playlist_url: continue try: @@ -1035,7 +1035,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227 group_id = self._search_regex( - r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, + rf'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})', webpage, 'group id', default=None) if group_id: return self.url_result( @@ -1043,9 +1043,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( - [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, - r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX, - r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], + [rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"', + rf'<param[^>]+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"', + rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'], webpage, 'vpid', default=None) if programme_id: @@ -1142,7 +1142,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE video_id, url_transparent=True) entry.update({ 'timestamp': traverse_obj(morph_payload, ( - 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}) + 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}), ), **traverse_obj(video_data, { 'thumbnail': (('iChefImage', 'image'), {url_or_none}, any), @@ -1189,7 +1189,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}), 'start_time': ('offset', 'start', {float_or_none}), 'end_time': ('offset', 'end', {float_or_none}), - }) + }), ), } @@ -1287,7 +1287,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any), 'duration': ('versions', 0, 'duration', {int}), 'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}), - }) + }), } def is_type(*types): @@ -1331,7 +1331,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if blocks: summary = [] for block in blocks: - text = try_get(block, lambda x: x['model']['text'], compat_str) + text = try_get(block, lambda x: x['model']['text'], str) if text: summary.append(text) if summary: @@ -1411,9 +1411,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE entries, playlist_id, playlist_title, playlist_description) def extract_all(pattern): - return list(filter(None, map( - lambda s: self._parse_json(s, playlist_id, fatal=False), - re.findall(pattern, webpage)))) + return list(filter(None, ( + self._parse_json(s, playlist_id, fatal=False) + for s in re.findall(pattern, webpage)))) # US accessed article with single embedded video (e.g. # https://www.bbc.com/news/uk-68546268) @@ -1435,14 +1435,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # Multiple video article (e.g. # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460) - EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX + EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?' entries = [] for match in extract_all(r'new\s+SMP\(({.+?})\)'): embed_url = match.get('playerSettings', {}).get('externalEmbedUrl') if embed_url and re.match(EMBED_URL, embed_url): entries.append(embed_url) entries.extend(re.findall( - r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage)) + rf'setPlaylist\("({EMBED_URL})"\)', webpage)) if entries: return self.playlist_result( [self.url_result(entry_, 'BBCCoUk') for entry_ in entries], @@ -1492,11 +1492,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE video_id = media_meta.get('externalId') if not video_id: - video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num) + video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}' title = media_meta.get('caption') if not title: - title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num) + title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}' duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration')) @@ -1557,8 +1557,8 @@ class BBCCoUkArticleIE(InfoExtractor): class BBCCoUkPlaylistBaseIE(InfoExtractor): def _entries(self, webpage, url, playlist_id): - single_page = 'page' in compat_urlparse.parse_qs( - compat_urlparse.urlparse(url).query) + single_page = 'page' in urllib.parse.parse_qs( + urllib.parse.urlparse(url).query) for page_num in itertools.count(2): for video_id in re.findall( self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage): @@ -1572,8 +1572,8 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): if not next_page: break webpage = self._download_webpage( - compat_urlparse.urljoin(url, next_page), playlist_id, - 'Downloading page %d' % page_num, page_num) + urllib.parse.urljoin(url, next_page), playlist_id, + f'Downloading page {page_num}', page_num) def _real_extract(self, url): playlist_id = self._match_id(url) @@ -1588,7 +1588,7 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor): - _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX + _VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P<id>{BBCCoUkIE._ID_REGEX})' @staticmethod def _get_default(episode, key, default_key='default'): @@ -1712,11 +1712,11 @@ class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE): variables['sliceId'] = series_id return self._download_json( 'https://graph.ibl.api.bbc.co.uk/', pid, headers={ - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', }, data=json.dumps({ 'id': '5692d93d5aac8d796a0305e895e61551', 'variables': variables, - }).encode('utf-8'))['data']['programme'] + }).encode())['data']['programme'] @staticmethod def _get_playlist_data(data): @@ -1776,7 +1776,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE): def _call_api(self, pid, per_page, page=1, series_id=None): return self._download_json( - 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid, + f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes', pid, query={ 'page': page, 'per_page': per_page, @@ -1792,7 +1792,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE): class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): IE_NAME = 'bbc.co.uk:playlist' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX + _VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)' _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s' _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)' _TESTS = [{ diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index 0aecbd0..acc8d12 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import int_or_none @@ -33,7 +32,7 @@ class BeatportIE(InfoExtractor): 'display_id': 'birds-original-mix', 'ext': 'mp4', 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", - } + }, }] def _real_extract(self, url): @@ -51,7 +50,7 @@ class BeatportIE(InfoExtractor): track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) - title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] + title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name'] if track['mix']: title += ' (' + track['mix'] + ')' @@ -89,7 +88,7 @@ class BeatportIE(InfoExtractor): images.append(image) return { - 'id': compat_str(track.get('id')) or track_id, + 'id': str(track.get('id')) or track_id, 'display_id': track.get('slug') or display_id, 'title': title, 'formats': formats, diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index da98ac3..960cdfa 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -23,7 +23,7 @@ class BeegIE(InfoExtractor): 'upload_date': '20220131', 'timestamp': 1643656455, 'display_id': '2540839', - } + }, }, { 'url': 'https://beeg.com/-0599050563103750?t=4-861', 'md5': 'bd8b5ea75134f7f07fad63008db2060e', @@ -38,7 +38,7 @@ class BeegIE(InfoExtractor): 'timestamp': 1643623200, 'display_id': '2569965', 'upload_date': '20220131', - } + }, }, { # api/v6 v2 'url': 'https://beeg.com/1941093077?t=911-1391', @@ -55,8 +55,8 @@ class BeegIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video = self._download_json( - 'https://store.externulls.com/facts/file/%s' % video_id, - video_id, 'Downloading JSON for %s' % video_id) + f'https://store.externulls.com/facts/file/{video_id}', + video_id, f'Downloading JSON for {video_id}') fc_facts = video.get('fc_facts') first_fact = {} diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py index 9d2324f..45f45d0 100644 --- a/yt_dlp/extractor/behindkink.py +++ b/yt_dlp/extractor/behindkink.py @@ -16,7 +16,7 @@ class BehindKinkIE(InfoExtractor): 'upload_date': '20141205', 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py index 677680b..ac45dd4 100644 --- a/yt_dlp/extractor/bellmedia.py +++ b/yt_dlp/extractor/bellmedia.py @@ -86,6 +86,6 @@ class BellMediaIE(InfoExtractor): return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), + 'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}', 'ie_key': 'NineCNineMedia', } diff --git a/yt_dlp/extractor/berufetv.py b/yt_dlp/extractor/berufetv.py index 8160cbd..5bba33a 100644 --- a/yt_dlp/extractor/berufetv.py +++ b/yt_dlp/extractor/berufetv.py @@ -16,7 +16,7 @@ class BerufeTVIE(InfoExtractor): 'tags': ['Studienfilm'], 'duration': 602.440, 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$', - } + }, }] def _real_extract(self, url): @@ -54,7 +54,7 @@ class BerufeTVIE(InfoExtractor): subtitles.setdefault(track['language'], []).append({ 'url': track['source'], 'name': track.get('label'), - 'ext': 'vtt' + 'ext': 'vtt', }) return { diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index cbf3dd0..3a8e743 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -19,7 +19,7 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download @@ -39,16 +39,16 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download 'skip_download': True, }, - } + }, ] - _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player' def _get_feed_query(self, uri): return { diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index c4621ca..87f0117 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -98,8 +98,8 @@ class BFMTVArticleIE(BFMTVBaseIE): 'timestamp': 1673341692, 'duration': 109.269, 'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'], - 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg' - } + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 02d1ba0..9c55bb9 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -1,10 +1,8 @@ +import base64 import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, -) class BigflixIE(InfoExtractor): @@ -21,7 +19,7 @@ class BigflixIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { # multiple formats 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', @@ -38,7 +36,7 @@ class BigflixIE(InfoExtractor): webpage, 'title') def decode_url(quoted_b64_url): - return compat_b64decode(compat_urllib_parse_unquote( + return base64.b64decode(urllib.parse.unquote( quoted_b64_url)).decode('utf-8') formats = [] @@ -47,7 +45,7 @@ class BigflixIE(InfoExtractor): video_url = decode_url(encoded_url) f = { 'url': video_url, - 'format_id': '%sp' % height, + 'format_id': f'{height}p', 'height': int(height), } if video_url.startswith('rtmp'): @@ -69,5 +67,5 @@ class BigflixIE(InfoExtractor): 'id': video_id, 'title': title, 'description': description, - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index acf78e4..b1c230f 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -36,7 +36,7 @@ class BigoIE(InfoExtractor): raise ExtractorError('Received invalid JSON data') if info_raw.get('code'): raise ExtractorError( - 'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True) + 'Bigo says: {} (code {})'.format(info_raw.get('msg'), info_raw.get('code')), expected=True) info = info_raw.get('data') or {} if not info.get('alive'): diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py index eb28932..2ba6370 100644 --- a/yt_dlp/extractor/bild.py +++ b/yt_dlp/extractor/bild.py @@ -20,7 +20,7 @@ class BildIE(InfoExtractor): 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 196, - } + }, }, { 'note': 'static MP4 and HLS', 'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html', @@ -32,7 +32,7 @@ class BildIE(InfoExtractor): 'description': 'md5:709b543c24dc31bbbffee73bccda34ad', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 69, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index b38c90b..a84b7a6 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -31,12 +31,12 @@ from ..utils import ( mimetype2ext, parse_count, parse_qs, + parse_resolution, qualities, smuggle_url, srt_subtitles_timecode, str_or_none, traverse_obj, - try_call, unified_timestamp, unsmuggle_url, url_or_none, @@ -47,6 +47,23 @@ from ..utils import ( class BilibiliBaseIE(InfoExtractor): _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?') + _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session + _wbi_key_cache = {} + + @property + def is_logged_in(self): + return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA')) + + def _check_missing_formats(self, play_info, formats): + parsed_qualities = set(traverse_obj(formats, (..., 'quality'))) + missing_formats = join_nonempty(*[ + traverse_obj(fmt, 'new_description', 'display_desc', 'quality') + for fmt in traverse_obj(play_info, ( + 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ') + if missing_formats: + self.to_screen( + f'Format(s) {missing_formats} are missing; you have to login or ' + f'become a premium member to download them. {self._login_hint()}') def extract_formats(self, play_info): format_names = { @@ -86,18 +103,75 @@ class BilibiliBaseIE(InfoExtractor): 'format': format_names.get(video.get('id')), } for video in traverse_obj(play_info, ('dash', 'video', ...))) - missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality'))) - if missing_formats: - self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; ' - f'you have to login or become premium member to download them. {self._login_hint()}') + if formats: + self._check_missing_formats(play_info, formats) + fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), { + 'url': ('url', {url_or_none}), + 'duration': ('length', {functools.partial(float_or_none, scale=1000)}), + 'filesize': ('size', {int_or_none}), + })) + if fragments: + formats.append({ + 'url': fragments[0]['url'], + 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))), + **({ + 'fragments': fragments, + 'protocol': 'http_dash_segments', + } if len(fragments) > 1 else {}), + **traverse_obj(play_info, { + 'quality': ('quality', {int_or_none}), + 'format_id': ('quality', {str_or_none}), + 'format_note': ('quality', {lambda x: format_names.get(x)}), + 'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}), + }), + **parse_resolution(format_names.get(play_info.get('quality'))), + }) return formats - def _download_playinfo(self, video_id, cid, headers=None): + def _get_wbi_key(self, video_id): + if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT: + return self._wbi_key_cache['key'] + + session_data = self._download_json( + 'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign') + + lookup = ''.join(traverse_obj(session_data, ( + 'data', 'wbi_img', ('img_url', 'sub_url'), + {lambda x: x.rpartition('/')[2].partition('.')[0]}))) + + # from getMixinKey() in the vendor js + mixin_key_enc_tab = [ + 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, + 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, + 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, + 36, 20, 34, 44, 52, + ] + + self._wbi_key_cache.update({ + 'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32], + 'ts': time.time(), + }) + return self._wbi_key_cache['key'] + + def _sign_wbi(self, params, video_id): + params['wts'] = round(time.time()) + params = { + k: ''.join(filter(lambda char: char not in "!'()*", str(v))) + for k, v in sorted(params.items()) + } + query = urllib.parse.urlencode(params) + params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest() + return params + + def _download_playinfo(self, bvid, cid, headers=None, qn=None): + params = {'bvid': bvid, 'cid': cid, 'fnval': 4048} + if qn: + params['qn'] = qn return self._download_json( - 'https://api.bilibili.com/x/player/playurl', video_id, - query={'bvid': video_id, 'cid': cid, 'fnval': 4048}, - note=f'Downloading video formats for cid {cid}', headers=headers)['data'] + 'https://api.bilibili.com/x/player/wbi/playurl', bvid, + query=self._sign_wbi(params, bvid), headers=headers, + note=f'Downloading video formats for cid {cid} {qn or ""}')['data'] def json2srt(self, json_data): srt_data = '' @@ -112,21 +186,21 @@ class BilibiliBaseIE(InfoExtractor): 'danmaku': [{ 'ext': 'xml', 'url': f'https://comment.bilibili.com/{cid}.xml', - }] + }], } - subtitle_info = traverse_obj(self._download_json( + video_info = self._download_json( 'https://api.bilibili.com/x/player/v2', video_id, query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid}, - note=f'Extracting subtitle info {cid}'), ('data', 'subtitle')) - subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan'])) - if not subs_list and traverse_obj(subtitle_info, 'allow_submit'): - if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie - self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True) - for s in subs_list: + note=f'Extracting subtitle info {cid}') + if traverse_obj(video_info, ('data', 'need_login_subtitle')): + self.report_warning( + f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True) + for s in traverse_obj(video_info, ( + 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])): subtitles.setdefault(s['lan'], []).append({ 'ext': 'srt', - 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)) + 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)), }) return subtitles @@ -203,19 +277,19 @@ class BilibiliBaseIE(InfoExtractor): self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges) return cid_edges - def _get_interactive_entries(self, video_id, cid, metainfo): + def _get_interactive_entries(self, video_id, cid, metainfo, headers=None): graph_version = traverse_obj( self._download_json( 'https://api.bilibili.com/x/player/wbi/v2', video_id, - 'Extracting graph version', query={'bvid': video_id, 'cid': cid}), + 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers), ('data', 'interaction', 'graph_version', {int_or_none})) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) for cid, edges in cid_edges.items(): - play_info = self._download_playinfo(video_id, cid) + play_info = self._download_playinfo(video_id, cid, headers=headers) yield { **metainfo, 'id': f'{video_id}_{cid}', - 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}', + 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}', 'formats': self.extract_formats(play_info), 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}', 'duration': float_or_none(play_info.get('timelength'), scale=1000), @@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE): 'timestamp': 1488353834, 'like_count': int, 'view_count': int, + '_old_archive_ids': ['bilibili 8903802_part1'], }, }, { 'note': 'old av URL version', 'url': 'http://www.bilibili.com/video/av1074402/', 'info_dict': { - 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$', + 'id': 'BV11x411K7CN', 'ext': 'mp4', + 'title': '【金坷垃】金泡沫', 'uploader': '菊子桑', 'uploader_id': '156160', - 'id': 'BV11x411K7CN', - 'title': '【金坷垃】金泡沫', 'duration': 308.36, 'upload_date': '20140420', 'timestamp': 1397983878, @@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE): 'comment_count': int, 'view_count': int, 'tags': list, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$', + '_old_archive_ids': ['bilibili 1074402_part1'], }, 'params': {'skip_download': True}, }, { @@ -269,7 +345,7 @@ class BiliBiliIE(BilibiliBaseIE): 'url': 'https://www.bilibili.com/video/BV1bK411W797', 'info_dict': { 'id': 'BV1bK411W797', - 'title': '物语中的人物是如何吐槽自己的OP的' + 'title': '物语中的人物是如何吐槽自己的OP的', }, 'playlist_count': 18, 'playlist': [{ @@ -288,8 +364,9 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } - }] + '_old_archive_ids': ['bilibili 498159642_part1'], + }, + }], }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } - }, { - 'note': 'video has subtitles', - 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', - 'info_dict': { - 'id': 'BV12N4y1M7rh', - 'ext': 'mp4', - 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1', - 'tags': list, - 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4', - 'duration': 313.557, - 'upload_date': '20220709', - 'uploader': '小夫太渴', - 'timestamp': 1657347907, - 'uploader_id': '1326814124', - 'comment_count': int, - 'view_count': int, - 'like_count': int, - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', - 'subtitles': 'count:2' + '_old_archive_ids': ['bilibili 498159642_part1'], }, - 'params': {'listsubtitles': True}, }, { 'url': 'https://www.bilibili.com/video/av8903802/', 'info_dict': { @@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE): 'comment_count': int, 'view_count': int, 'like_count': int, + '_old_archive_ids': ['bilibili 8903802_part1'], }, 'params': { 'skip_download': True, @@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 463665680_part1'], }, 'params': {'skip_download': True}, }, { @@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 893839363_part1'], }, - 'params': {'skip_download': True}, }, { 'note': 'newer festival video', 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f', @@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 778246196_part1'], + }, + }, { + 'note': 'legacy flv/mp4 video', + 'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4', + 'info_dict': { + 'id': 'BV1ms411Q7vw_p4', + 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛', + 'timestamp': 1458222815, + 'upload_date': '20160317', + 'description': '云南方言快乐生产线出品', + 'duration': float, + 'uploader': '一笑颠天', + 'uploader_id': '3916081', + 'view_count': int, + 'comment_count': int, + 'like_count': int, + 'tags': list, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 4120229_part4'], + }, + 'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}}, + 'playlist_count': 19, + 'playlist': [{ + 'info_dict': { + 'id': 'BV1ms411Q7vw_p4_0', + 'ext': 'flv', + 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛', + 'duration': 399.102, + }, + }], + }, { + 'note': 'legacy mp4-only video', + 'url': 'https://www.bilibili.com/video/BV1nx411u79K', + 'info_dict': { + 'id': 'BV1nx411u79K', + 'ext': 'mp4', + 'title': '【练习室】201603声乐练习《No Air》with VigoVan', + 'timestamp': 1508893551, + 'upload_date': '20171025', + 'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van', + 'duration': 80.384, + 'uploader': '伯远', + 'uploader_id': '10584494', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'tags': list, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 15700301_part1'], }, - 'params': {'skip_download': True}, }, { 'note': 'interactive/split-path video', 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/', @@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 292734508_part1'], }, 'playlist_count': 33, 'playlist': [{ @@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + '_old_archive_ids': ['bilibili 292734508_part1'], }, }], }, { @@ -466,6 +576,29 @@ class BiliBiliIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }, { + 'note': 'video has subtitles, which requires login', + 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', + 'info_dict': { + 'id': 'BV12N4y1M7rh', + 'ext': 'mp4', + 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1', + 'tags': list, + 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4', + 'duration': 313.557, + 'upload_date': '20220709', + 'uploader': '小夫太渴', + 'timestamp': 1657347907, + 'uploader_id': '1326814124', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'subtitles': 'count:2', # login required for CC subtitle + '_old_archive_ids': ['bilibili 898179753_part1'], + }, + 'params': {'listsubtitles': True}, + 'skip': 'login required for subtitle', + }, { 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/', 'info_dict': { 'id': 'BV1jL41167ZG', @@ -498,8 +631,9 @@ class BiliBiliIE(BilibiliBaseIE): if not self._match_valid_url(urlh.url): return self.url_result(urlh.url) - initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + headers['Referer'] = url + initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) is_festival = 'videoData' not in initial_state if is_festival: video_data = initial_state['videoInfo'] @@ -548,7 +682,6 @@ class BiliBiliIE(BilibiliBaseIE): aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') - cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') festival_info = {} @@ -586,19 +719,65 @@ class BiliBiliIE(BilibiliBaseIE): is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate')) if is_interactive: return self.playlist_result( - self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{ - 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), - '__post_extractor': self.extract_comments(aid), - }) + self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo, + duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), + __post_extractor=self.extract_comments(aid)) else: - return { - **metainfo, - 'duration': float_or_none(play_info.get('timelength'), scale=1000), - 'chapters': self._get_chapters(aid, cid), - 'subtitles': self.extract_subtitles(video_id, cid), - 'formats': self.extract_formats(play_info), - '__post_extractor': self.extract_comments(aid), - } + formats = self.extract_formats(play_info) + + if not traverse_obj(play_info, ('dash')): + # we only have legacy formats and need additional work + has_qn = lambda x: x in traverse_obj(formats, (..., 'quality')) + for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})): + formats.extend(traverse_obj( + self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)), + lambda _, v: not has_qn(v['quality']))) + self._check_missing_formats(play_info, formats) + flv_formats = traverse_obj(formats, lambda _, v: v['fragments']) + if flv_formats and len(flv_formats) < len(formats): + # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one + if not self._configuration_arg('prefer_multi_flv'): + dropped_fmts = ', '.join( + f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats) + formats = traverse_obj(formats, lambda _, v: not v.get('fragments')) + if dropped_fmts: + self.to_screen( + f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. ' + 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"') + else: + formats = traverse_obj( + # XXX: Filtering by extractor-arg is for testing purposes + formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]), + ) or [max(flv_formats, key=lambda x: x['quality'])] + + if traverse_obj(formats, (0, 'fragments')): + # We have flv formats, which are individual short videos with their own timestamps and metainfo + # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround + return { + **metainfo, + '_type': 'multi_video', + 'entries': [{ + 'id': f'{metainfo["id"]}_{idx}', + 'title': metainfo['title'], + 'http_headers': metainfo['http_headers'], + 'formats': [{ + **fragment, + 'format_id': formats[0].get('format_id'), + }], + 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None, + '__post_extractor': self.extract_comments(aid) if idx == 0 else None, + } for idx, fragment in enumerate(formats[0]['fragments'])], + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + } + else: + return { + **metainfo, + 'formats': formats, + 'duration': float_or_none(play_info.get('timelength'), scale=1000), + 'chapters': self._get_chapters(aid, cid), + 'subtitles': self.extract_subtitles(video_id, cid), + '__post_extractor': self.extract_comments(aid), + } class BiliBiliBangumiIE(BilibiliBaseIE): @@ -640,7 +819,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1425.256, 'timestamp': 1554566400, 'upload_date': '20190406', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'skip': 'Geo-restricted', }, { @@ -661,7 +840,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1922.129, 'timestamp': 1602853860, 'upload_date': '20201016', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }] @@ -764,7 +943,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'duration': 1525.777, 'timestamp': 1425074413, 'upload_date': '20150227', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -794,7 +973,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'title': '鬼灭之刃', 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b', }, - 'playlist_mincount': 26 + 'playlist_mincount': 26, }, { 'url': 'https://www.bilibili.com/bangumi/play/ss2251', 'info_dict': { @@ -819,7 +998,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'duration': 1436.992, 'timestamp': 1343185080, 'upload_date': '20120725', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -906,7 +1085,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -939,7 +1118,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }], 'params': {'playlist_items': '1'}, }, { @@ -969,7 +1148,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE): })) -class BilibiliSpaceBaseIE(InfoExtractor): +class BilibiliSpaceBaseIE(BilibiliBaseIE): def _extract_playlist(self, fetch_page, get_metadata, get_entries): first_page = fetch_page(0) metadata = get_metadata(first_page) @@ -989,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): 'id': '3985676', }, 'playlist_mincount': 178, + 'skip': 'login required', }, { 'url': 'https://space.bilibili.com/313580179/video', 'info_dict': { 'id': '313580179', }, 'playlist_mincount': 92, + 'skip': 'login required', }] - def _extract_signature(self, playlist_id): - session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False) - - key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0] - img_key = traverse_obj( - session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100' - sub_key = traverse_obj( - session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6' - - session_key = img_key + sub_key - - signature_values = [] - for position in ( - 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, - 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, - 57, 62, 11, 36, 20, 34, 44, 52 - ): - char_at_position = try_call(lambda: session_key[position]) - if char_at_position: - signature_values.append(char_at_position) - - return ''.join(signature_values)[:32] - def _real_extract(self, url): playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video') if not is_video_url: self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. ' 'To download audios, add a "/audio" to the URL') - signature = self._extract_signature(playlist_id) - def fetch_page(page_idx): query = { 'keyword': '', 'mid': playlist_id, - 'order': 'pubdate', + 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate', 'order_avoided': 'true', 'platform': 'web', 'pn': page_idx + 1, 'ps': 30, 'tid': 0, 'web_location': 1550101, - 'wts': int(time.time()), } - query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest() try: - response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search', - playlist_id, note=f'Downloading page {page_idx}', query=query, - headers={'referer': url}) + response = self._download_json( + 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id, + query=self._sign_wbi(query, playlist_id), + note=f'Downloading space page {page_idx}', headers={'Referer': url}) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 412: raise ExtractorError( 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True) raise - if response['code'] in (-352, -401): + status_code = response['code'] + if status_code == -401: raise ExtractorError( - f'Request is blocked by server ({-response["code"]}), ' - 'please add cookies, wait and try later.', expected=True) + 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True) + elif status_code == -352 and not self.is_logged_in: + self.raise_login_required('Request is rejected, you need to login to access playlist') + elif status_code != 0: + raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}') return response['data'] def get_metadata(page_data): @@ -1163,7 +1322,7 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE): 'uploader_id': ('meta', 'mid', {str_or_none}), 'timestamp': ('meta', 'ptime', {int_or_none}), 'thumbnail': ('meta', 'cover', {url_or_none}), - }) + }), } def get_entries(page_data): @@ -1195,7 +1354,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE): mid, sid = self._match_valid_url(url).group('mid', 'sid') playlist_id = f'{mid}_{sid}' playlist_meta = traverse_obj(self._download_json( - f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False + f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False, ), { 'title': ('data', 'meta', 'name', {str}), 'description': ('data', 'meta', 'description', {str}), @@ -1217,7 +1376,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE): 'page_count': math.ceil(entry_count / page_size), 'page_size': page_size, 'uploader': self._get_uploader(mid, playlist_id), - **playlist_meta + **playlist_meta, } def get_entries(page_data): @@ -1241,7 +1400,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE): 'upload_date': '20201109', 'modified_timestamp': int, 'modified_date': str, - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', 'view_count': int, 'like_count': int, }, @@ -1281,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE): _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)' _TESTS = [{ 'url': 'https://www.bilibili.com/watchlater/#/list', - 'info_dict': {'id': 'watchlater'}, + 'info_dict': { + 'id': r're:\d+', + 'title': '稍后再看', + }, 'playlist_mincount': 0, 'skip': 'login required', }] @@ -1345,7 +1507,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'uploader_id': '84912', 'timestamp': 1604905176, 'upload_date': '20201109', - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', }, 'playlist_mincount': 22, }, { @@ -1357,21 +1519,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'skip': 'redirect url', }, { 'url': 'https://www.bilibili.com/list/watchlater', - 'info_dict': {'id': 'watchlater'}, + 'info_dict': { + 'id': r're:2_\d+', + 'title': '稍后再看', + 'uploader': str, + 'uploader_id': str, + }, 'playlist_mincount': 0, 'skip': 'login required', }, { 'url': 'https://www.bilibili.com/medialist/play/watchlater', 'info_dict': {'id': 'watchlater'}, 'playlist_mincount': 0, - 'skip': 'login required', + 'skip': 'redirect url & login required', }] def _extract_medialist(self, query, list_id): for page_num in itertools.count(1): page_data = self._download_json( 'https://api.bilibili.com/x/v2/medialist/resource/list', - list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}' + list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}', )['data'] yield from self._get_entries(page_data, 'media_list', ending_key='bv_id') query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id')) @@ -1407,7 +1574,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'tid': ('tid', {int_or_none}), 'sort_field': ('sortFiled', {int_or_none}), 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}), - }) + }), } metadata = { 'id': f'{query["type"]}_{query["biz_id"]}', @@ -1415,7 +1582,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'title': ('title', {str}), 'uploader': ('upper', 'name', {str}), 'uploader_id': ('upper', 'mid', {str_or_none}), - 'timestamp': ('ctime', {int_or_none}), + 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}), 'thumbnail': ('cover', {url_or_none}), })), } @@ -1430,26 +1597,26 @@ class BilibiliCategoryIE(InfoExtractor): 'url': 'https://www.bilibili.com/v/kichiku/mad', 'info_dict': { 'id': 'kichiku: mad', - 'title': 'kichiku: mad' + 'title': 'kichiku: mad', }, 'playlist_mincount': 45, 'params': { - 'playlistend': 45 - } + 'playlistend': 45, + }, }] def _fetch_page(self, api_url, num_pages, query, page_num): parsed_json = self._download_json( api_url, query, query={'Search_key': query, 'pn': page_num}, - note='Extracting results from page %s of %s' % (page_num, num_pages)) + note=f'Extracting results from page {page_num} of {num_pages}') video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list) if not video_list: - raise ExtractorError('Failed to retrieve video list for page %d' % page_num) + raise ExtractorError(f'Failed to retrieve video list for page {page_num}') for video in video_list: yield self.url_result( - 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid']) + 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid']) def _entries(self, category, subcategory, query): # map of categories : subcategories : RIDs @@ -1459,7 +1626,7 @@ class BilibiliCategoryIE(InfoExtractor): 'manual_vocaloid': 126, 'guide': 22, 'theatre': 216, - 'course': 127 + 'course': 127, }, } @@ -1485,7 +1652,7 @@ class BilibiliCategoryIE(InfoExtractor): def _real_extract(self, url): category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4] - query = '%s: %s' % (category, subcategory) + query = f'{category}: {subcategory}' return self.playlist_result(self._entries(category, subcategory, query), query, query) @@ -1588,7 +1755,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): formats = [{ 'url': play_data['cdns'][0], 'filesize': int_or_none(play_data.get('size')), - 'vcodec': 'none' + 'vcodec': 'none', }] for a_format in formats: @@ -1606,7 +1773,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): subtitles = { 'origin': [{ 'url': lyric, - }] + }], } return { @@ -1674,7 +1841,7 @@ class BiliBiliPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) return self.url_result( - 'http://www.bilibili.tv/video/av%s/' % video_id, + f'http://www.bilibili.tv/video/av{video_id}/', ie=BiliBiliIE.ie_key(), video_id=video_id) @@ -1702,11 +1869,10 @@ class BiliIntlBaseIE(InfoExtractor): return json.get('data') def json2srt(self, json): - data = '\n\n'.join( + return '\n\n'.join( f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}' for i, line in enumerate(traverse_obj(json, ( 'body', lambda _, l: l['content'] and l['from'] and l['to'])))) - return data def _get_subtitles(self, *, ep_id=None, aid=None): sub_json = self._call_api( @@ -1808,14 +1974,15 @@ class BiliIntlBaseIE(InfoExtractor): note='Downloading login key', errnote='Unable to download login key')['data'] public_key = Cryptodome.RSA.importKey(key_data['key']) - password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) + password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode()) login_post = self._download_json( - 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ + 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, + data=urlencode_postdata({ 'username': username, 'password': base64.b64encode(password_hash).decode('ascii'), 'keep_me': 'true', 's_locale': 'en_US', - 'isTrusted': 'true' + 'isTrusted': 'true', }), note='Logging in', errnote='Unable to log in') if login_post.get('code'): if login_post.get('message'): @@ -1842,17 +2009,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 76.242, - 'title': '<Untitled Chapter 1>' + 'title': '<Untitled Chapter 1>', }, { 'start_time': 76.242, 'end_time': 161.161, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1325.742, 'end_time': 1403.903, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Non-Bstation page 'url': 'https://www.bilibili.tv/en/play/1033760/11005006', @@ -1869,17 +2036,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 88.0, - 'title': '<Untitled Chapter 1>' + 'title': '<Untitled Chapter 1>', }, { 'start_time': 88.0, 'end_time': 156.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1173.0, 'end_time': 1259.535, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Subtitle with empty content 'url': 'https://www.bilibili.tv/en/play/1005144/10131790', @@ -1890,7 +2057,7 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 140, }, - 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.' + 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.', }, { # episode comment extraction 'url': 'https://www.bilibili.tv/en/play/34580/340317', @@ -1908,20 +2075,20 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 61.0, - 'title': '<Untitled Chapter 1>' + 'title': '<Untitled Chapter 1>', }, { 'start_time': 61.0, 'end_time': 134.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1290.0, 'end_time': 1379.0, - 'title': 'Outro' + 'title': 'Outro', }], }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # user generated content comment extraction 'url': 'https://www.bilibili.tv/en/video/2045730385', @@ -1936,8 +2103,8 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg', }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # episode id without intro and outro 'url': 'https://www.bilibili.tv/en/play/1048837/11246489', @@ -1992,7 +2159,7 @@ class BiliIntlIE(BiliIntlBaseIE): # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) video_data = traverse_obj(season_json, ( - 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id + 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id, ), expected_type=dict, get_all=False) # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found @@ -2024,7 +2191,7 @@ class BiliIntlIE(BiliIntlBaseIE): 'id': replies.get('rpid'), 'like_count': int_or_none(replies.get('like_count')), 'parent': replies.get('parent'), - 'timestamp': unified_timestamp(replies.get('ctime_text')) + 'timestamp': unified_timestamp(replies.get('ctime_text')), } if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')): @@ -2077,11 +2244,11 @@ class BiliIntlIE(BiliIntlBaseIE): chapters = [{ 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000), - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000), - 'title': 'Outro' + 'title': 'Outro', }] return { @@ -2137,12 +2304,13 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): episode_id = str(episode['episode_id']) yield self.url_result(smuggle_url( BiliIntlIE._make_url(episode_id, series_id), - self._parse_video_metadata(episode) + self._parse_video_metadata(episode), ), BiliIntlIE, episode_id) def _real_extract(self, url): series_id = self._match_id(url) - series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {} + series_info = self._call_api( + f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {} return self.playlist_result( self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'), categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none), @@ -2156,19 +2324,19 @@ class BiliLiveIE(InfoExtractor): 'url': 'https://live.bilibili.com/196', 'info_dict': { 'id': '33989', - 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)", + 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)', 'ext': 'flv', - 'title': "太空狼人杀联动,不被爆杀就算赢", - 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg", + 'title': '太空狼人杀联动,不被爆杀就算赢', + 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg', 'timestamp': 1650802769, }, - 'skip': 'not live' + 'skip': 'not live', }, { 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://live.bilibili.com/blanc/196', - 'only_matching': True + 'only_matching': True, }] _FORMATS = { @@ -2209,7 +2377,7 @@ class BiliLiveIE(InfoExtractor): raise ExtractorError('Streamer is not live', expected=True) formats = [] - for qn in self._FORMATS.keys(): + for qn in self._FORMATS: stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, { 'room_id': room_id, 'qn': qn, diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index 194bf1f..c83222e 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -24,7 +24,7 @@ from ..utils import ( class BitChuteIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] _TESTS = [{ 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', @@ -39,7 +39,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20170103', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, }, { # test case: video with different channel and uploader @@ -55,7 +55,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20231106', 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', 'channel': 'Full Measure with Sharyl Attkisson', - 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/' + 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/', }, }, { # video not downloadable in browser, but we can recover it @@ -72,7 +72,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20181113', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, 'params': {'check_formats': None}, }, { @@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor): }, { 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', 'only_matching': True, + }, { + 'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/', + 'only_matching': True, }] _GEO_BYPASS = False @@ -115,7 +118,7 @@ class BitChuteIE(InfoExtractor): continue return { 'url': url, - 'filesize': int_or_none(response.headers.get('Content-Length')) + 'filesize': int_or_none(response.headers.get('Content-Length')), } def _raise_if_restricted(self, webpage): @@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) + f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) self._raise_if_restricted(webpage) publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) @@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor): class BitChuteChannelIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bitchute.com/channel/bitchute/', 'info_dict': { 'id': 'bitchute', 'title': 'BitChute', - 'description': 'md5:5329fb3866125afa9446835594a9b138', + 'description': 'md5:2134c37d64fc3a4846787c402956adac', }, 'playlist': [ { @@ -196,7 +199,7 @@ class BitChuteChannelIE(InfoExtractor): 'duration': 16, 'view_count': int, }, - } + }, ], 'params': { 'skip_download': True, @@ -209,7 +212,10 @@ class BitChuteChannelIE(InfoExtractor): 'id': 'wV9Imujxasw9', 'title': 'Bruce MacDonald and "The Light of Darkness"', 'description': 'md5:747724ef404eebdfc04277714f81863e', - } + }, + }, { + 'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/', + 'only_matching': True, }] _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' @@ -224,13 +230,13 @@ class BitChuteChannelIE(InfoExtractor): 'container': 'playlist-video', 'title': 'title', 'description': 'description', - } + }, } @staticmethod def _make_url(playlist_id, playlist_type): - return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/' + return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/' def _fetch_page(self, playlist_id, playlist_type, page_num): playlist_url = self._make_url(playlist_id, playlist_type) diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 8f41c89..5358909 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -47,7 +47,7 @@ class BlackboardCollaborateIE(InfoExtractor): region = mobj.group('region') video_id = mobj.group('id') info = self._download_json( - 'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id) + f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) duration = info.get('duration') title = info['name'] upload_date = info.get('created') diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index aa3d63e..71b237d 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -44,7 +44,7 @@ class BleacherReportIE(InfoExtractor): def _real_extract(self, url): article_id = self._match_id(url) - article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] + article_data = self._download_json(f'http://api.bleacherreport.com/api/v1/articles/{article_id}', article_id)['article'] thumbnails = [] primary_photo = article_data.get('primaryPhoto') @@ -71,11 +71,11 @@ class BleacherReportIE(InfoExtractor): if video: video_type = video['type'] if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): - info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] + info['url'] = 'http://bleacherreport.com/video_embed?id={}'.format(video['id']) elif video_type == 'youtube.com': info['url'] = video['id'] elif video_type == 'vine.co': - info['url'] = 'https://vine.co/v/%s' % video['id'] + info['url'] = 'https://vine.co/v/{}'.format(video['id']) else: info['url'] = video_type + video['id'] return info @@ -99,12 +99,12 @@ class BleacherReportCMSIE(AMPIE): }, 'expected_warnings': [ - 'Unable to download f4m manifest' - ] + 'Unable to download f4m manifest', + ], }] def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) + info = self._extract_feed_info(f'http://vid.bleacherreport.com/videos/{video_id}.akamai') info['id'] = video_id return info diff --git a/yt_dlp/extractor/blerp.py b/yt_dlp/extractor/blerp.py index 4631ad2..f4f2248 100644 --- a/yt_dlp/extractor/blerp.py +++ b/yt_dlp/extractor/blerp.py @@ -16,7 +16,7 @@ class BlerpIE(InfoExtractor): 'uploader_id': '5fb81e51aa66ae000c395478', 'ext': 'mp3', 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'], - } + }, }, { 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f', 'info_dict': { @@ -25,11 +25,11 @@ class BlerpIE(InfoExtractor): 'uploader': '179617322678353920', 'uploader_id': '5ba99cf71386730004552c42', 'ext': 'mp3', - 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'] - } + 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'], + }, }] - _GRAPHQL_OPERATIONNAME = "webBitePageGetBite" + _GRAPHQL_OPERATIONNAME = 'webBitePageGetBite' _GRAPHQL_QUERY = ( '''query webBitePageGetBite($_id: MongoID!) { web { @@ -141,27 +141,26 @@ class BlerpIE(InfoExtractor): 'operationName': self._GRAPHQL_OPERATIONNAME, 'query': self._GRAPHQL_QUERY, 'variables': { - '_id': audio_id - } + '_id': audio_id, + }, } headers = { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', } - json_result = self._download_json('https://api.blerp.com/graphql', - audio_id, data=json.dumps(data).encode('utf-8'), headers=headers) + json_result = self._download_json( + 'https://api.blerp.com/graphql', audio_id, + data=json.dumps(data).encode(), headers=headers) bite_json = json_result['data']['web']['biteById'] - info_dict = { + return { 'id': bite_json['_id'], 'url': bite_json['audio']['mp3']['url'], 'title': bite_json['title'], 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none), 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none), 'ext': 'mp3', - 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None) + 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None), } - - return info_dict diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index ef0151d..1614b6f 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -21,14 +21,14 @@ class BloggerIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': r're:^https?://.*', 'duration': 76.068, - } + }, }] def _real_extract(self, url): token_id = self._match_id(url) webpage = self._download_webpage(url, token_id) data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data') - data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id) + data = self._parse_json(data_json.encode().decode('unicode_escape'), token_id) streams = data['streams'] formats = [{ 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))), diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index 792155e..ec6b7a8 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -55,7 +55,7 @@ class BloombergIE(InfoExtractor): title = re.sub(': Video$', '', self._og_search_title(webpage)) embed_info = self._download_json( - 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id) + f'http://www.bloomberg.com/multimedia/api/embed?id={video_id}', video_id) formats = [] for stream in embed_info['streams']: stream_url = stream.get('url') diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py index ca326f2..5fe937a 100644 --- a/yt_dlp/extractor/bokecc.py +++ b/yt_dlp/extractor/bokecc.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_parse_qs from ..utils import ExtractorError @@ -9,20 +10,18 @@ class BokeCCBaseIE(InfoExtractor): r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)', webpage, 'player params', group='query') - player_params = compat_parse_qs(player_params_str) + player_params = urllib.parse.parse_qs(player_params_str) info_xml = self._download_xml( - 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( + 'http://p.bokecc.com/servlet/playinfo?uid={}&vid={}&m=1'.format( player_params['siteid'][0], player_params['vid'][0]), video_id) - formats = [{ + return [{ 'format_id': format_id, 'url': quality.find('./copy').attrib['playurl'], 'quality': int(quality.attrib['value']), } for quality in info_xml.findall('./video/quality')] - return formats - class BokeCCIE(BokeCCBaseIE): _IE_DESC = 'CC视频' @@ -38,11 +37,11 @@ class BokeCCIE(BokeCCBaseIE): }] def _real_extract(self, url): - qs = compat_parse_qs(self._match_valid_url(url).group('query')) + qs = urllib.parse.parse_qs(self._match_valid_url(url).group('query')) if not qs.get('vid') or not qs.get('uid'): raise ExtractorError('Invalid URL', expected=True) - video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0]) + video_id = '{}_{}'.format(qs['uid'][0], qs['vid'][0]) webpage = self._download_webpage(url, video_id) diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index bf95566..ab85477 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, try_get, @@ -38,7 +37,7 @@ class BongaCamsIE(InfoExtractor): channel_id = mobj.group('id') amf = self._download_json( - 'https://%s/tools/amf.php' % host, channel_id, + f'https://{host}/tools/amf.php', channel_id, data=urlencode_postdata(( ('method', 'getRoomData'), ('args[]', channel_id), @@ -48,14 +47,14 @@ class BongaCamsIE(InfoExtractor): server_url = amf['localData']['videoServerUrl'] uploader_id = try_get( - amf, lambda x: x['performerData']['username'], compat_str) or channel_id + amf, lambda x: x['performerData']['username'], str) or channel_id uploader = try_get( - amf, lambda x: x['performerData']['displayName'], compat_str) + amf, lambda x: x['performerData']['displayName'], str) like_count = int_or_none(try_get( amf, lambda x: x['performerData']['loversCount'])) formats = self._extract_m3u8_formats( - '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id), + f'{server_url}/hls/stream_{uploader_id}/playlist.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True) return { diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 2675866..f5b8196 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -57,8 +57,7 @@ class BostonGlobeIE(InfoExtractor): if video_id and account_id and player_id and embed: entries.append( - 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' - % (account_id, player_id, embed, video_id)) + f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}') if len(entries) == 0: return self.url_result(url, 'Generic') diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 008c011..3547ad9 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -72,20 +72,20 @@ class BoxIE(InfoExtractor): 'BoxApi': 'shared_link=' + shared_link, 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats }, query={ - 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size' + 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size', }) title = f['name'] query = { 'access_token': access_token, - 'shared_link': shared_link + 'shared_link': shared_link, } formats = [] for url_tmpl in traverse_obj(f, ( 'representations', 'entries', lambda _, v: v['representation'] == 'dash', - 'content', 'url_template', {url_or_none} + 'content', 'url_template', {url_or_none}, )): manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query) fmts = self._extract_mpd_formats(manifest_url, file_id) diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py index da06cc3..efa6699 100644 --- a/yt_dlp/extractor/boxcast.py +++ b/yt_dlp/extractor/boxcast.py @@ -21,7 +21,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }, { 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad', 'info_dict': { @@ -30,8 +30,8 @@ class BoxCastVideoIE(InfoExtractor): 'uploader_id': 'vctwevwntun3o0ikq7af', 'uploader': 'Legacy Christian Church', 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools', - 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg' - } + 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg', + }, }, { 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev', 'info_dict': { @@ -44,7 +44,7 @@ class BoxCastVideoIE(InfoExtractor): 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland', 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340', 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022', - } + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://childrenshealthdefense.eu/live-stream/', @@ -57,7 +57,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 6e1c63e..0568e06 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -61,7 +61,7 @@ class BRIE(InfoExtractor): 'title': 'Umweltbewusster Häuslebauer', 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2', 'duration': 116, - } + }, }, { 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', @@ -74,7 +74,7 @@ class BRIE(InfoExtractor): 'duration': 893, 'uploader': 'Eva Maria Steimle', 'upload_date': '20170208', - } + }, }, ] @@ -142,7 +142,7 @@ class BRIE(InfoExtractor): http_format_info = format_info.copy() http_format_info.update({ 'url': format_url, - 'format_id': 'http-%s' % asset_type, + 'format_id': f'http-{asset_type}', }) formats.append(http_format_info) server_prefix = xpath_text(asset, 'serverPrefix') @@ -151,7 +151,7 @@ class BRIE(InfoExtractor): rtmp_format_info.update({ 'url': server_prefix, 'play_path': xpath_text(asset, 'fileName'), - 'format_id': 'rtmp-%s' % asset_type, + 'format_id': f'rtmp-{asset_type}', }) formats.append(rtmp_format_info) return formats diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py index 04b1dd8..df10299 100644 --- a/yt_dlp/extractor/brainpop.py +++ b/yt_dlp/extractor/brainpop.py @@ -52,8 +52,8 @@ class BrainPOPBaseIE(InfoExtractor): '%s': {}, 'ad_%s': { 'format_note': 'Audio description', - 'source_preference': -2 - } + 'source_preference': -2, + }, } for additional_key_format, additional_key_fields in additional_key_formats.items(): for key_quality, key_index in enumerate(('high', 'low')): @@ -62,7 +62,7 @@ class BrainPOPBaseIE(InfoExtractor): formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, { 'quality': -1 - key_quality, **additional_key_fields, - **extra_fields + **extra_fields, })) return formats @@ -72,7 +72,7 @@ class BrainPOPBaseIE(InfoExtractor): data=json.dumps({'username': username, 'password': password}).encode(), headers={ 'Content-Type': 'application/json', - 'Referer': self._ORIGIN + 'Referer': self._ORIGIN, }, note='Logging in', errnote='Unable to log in', expected_status=400) status_code = int_or_none(login_res['status_code']) if status_code != 1505: @@ -131,12 +131,12 @@ class BrainPOPIE(BrainPOPBaseIE): formats, subtitles = [], {} formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', { 'language': movie_feature.get('language') or 'en', - 'language_preference': 10 + 'language_preference': 10, })) for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items(): formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', { 'language': lang, - 'language_preference': -10 + 'language_preference': -10, })) # TODO: Do localization fields also have subtitles? @@ -145,7 +145,7 @@ class BrainPOPIE(BrainPOPBaseIE): r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None) if lang and url: subtitles.setdefault(lang, []).append({ - 'url': urljoin(self._CDN_URL, url) + 'url': urljoin(self._CDN_URL, url), }) return { diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py index 419fe8c..ec72f0d 100644 --- a/yt_dlp/extractor/bravotv.py +++ b/yt_dlp/extractor/bravotv.py @@ -185,5 +185,5 @@ class BravoTVIE(AdobePassIE): 'episode_number': ('episodeNumber', {int_or_none}), 'episode': 'episodeTitle', 'series': 'show', - })) + })), } diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index b5abb7f..fedf477 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -13,7 +13,7 @@ class BreitBartIE(InfoExtractor): 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', 'age_limit': 0, - } + }, }, { 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', 'only_matching': True, @@ -30,5 +30,5 @@ class BreitBartIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'age_limit': self._rta_search(webpage), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 4190e1a..2526f25 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -1,15 +1,12 @@ import base64 import re import struct +import urllib.parse import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_parse_qs, - compat_urlparse, -) +from ..compat import compat_etree_fromstring from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -21,6 +18,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, int_or_none, + join_nonempty, js_to_json, mimetype2ext, parse_iso8601, @@ -142,7 +140,7 @@ class BrightcoveLegacyIE(InfoExtractor): # from http://www.un.org/chinese/News/story.asp?NewsID=27724 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', 'only_matching': True, # Tested in GenericIE - } + }, ] _WEBPAGE_TESTS = [{ @@ -315,7 +313,7 @@ class BrightcoveLegacyIE(InfoExtractor): object_str = fix_xml_ampersands(object_str) try: - object_doc = compat_etree_fromstring(object_str.encode('utf-8')) + object_doc = compat_etree_fromstring(object_str.encode()) except xml.etree.ElementTree.ParseError: return @@ -323,7 +321,7 @@ class BrightcoveLegacyIE(InfoExtractor): if fv_el is not None: flashvars = dict( (k, v[0]) - for k, v in compat_parse_qs(fv_el.attrib['value']).items()) + for k, v in urllib.parse.parse_qs(fv_el.attrib['value']).items()) else: flashvars = {} @@ -340,32 +338,32 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') or find_param('playerId') - if playerID is None: + player_id = find_param('playerID') or find_param('playerId') + if player_id is None: raise ExtractorError('Cannot find player ID') - params['playerID'] = playerID + params['playerID'] = player_id - playerKey = find_param('playerKey') + player_key = find_param('playerKey') # Not all pages define this value - if playerKey is not None: - params['playerKey'] = playerKey + if player_key is not None: + params['playerKey'] = player_key # These fields hold the id of the video - videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') - if videoPlayer is not None: - if isinstance(videoPlayer, list): - videoPlayer = videoPlayer[0] - videoPlayer = videoPlayer.strip() + video_player = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') + if video_player is not None: + if isinstance(video_player, list): + video_player = video_player[0] + video_player = video_player.strip() # UUID is also possible for videoPlayer (e.g. # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd # or http://www8.hp.com/cn/zh/home.html) if not (re.match( r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', - videoPlayer) or videoPlayer.startswith('ref:')): + video_player) or video_player.startswith('ref:')): return None - params['@videoPlayer'] = videoPlayer - linkBase = find_param('linkBaseURL') - if linkBase is not None: - params['linkBaseURL'] = linkBase + params['@videoPlayer'] = video_player + link_base = find_param('linkBaseURL') + if link_base is not None: + params['linkBaseURL'] = link_base return cls._make_brightcove_url(params) @classmethod @@ -389,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor): @classmethod def _make_brightcove_url(cls, params): return update_url_query( - 'http://c.brightcove.com/services/viewer/htmlFederated', params) + 'https://c.brightcove.com/services/viewer/htmlFederated', params) @classmethod def _extract_brightcove_url(cls, webpage): @@ -448,13 +446,13 @@ class BrightcoveLegacyIE(InfoExtractor): url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = self._match_valid_url(url) query_str = mobj.group('query') - query = compat_urlparse.parse_qs(query_str) + query = urllib.parse.parse_qs(query_str) - videoPlayer = query.get('@videoPlayer') - if videoPlayer: + video_player = query.get('@videoPlayer') + if video_player: # We set the original url as the default 'Referer' header referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url) - video_id = videoPlayer[0] + video_id = video_player[0] if 'playerID' not in query: mobj = re.search(r'/bcpid(\d+)', url) if mobj is not None: @@ -473,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor): if referer: headers['Referer'] = referer player_page = self._download_webpage( - 'http://link.brightcove.com/services/player/bcpid' + player_id[0], + 'https://link.brightcove.com/services/player/bcpid' + player_id[0], video_id, headers=headers, fatal=False) if player_page: player_key = self._search_regex( @@ -483,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor): enc_pub_id = player_key.split(',')[1].replace('~', '=') publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] if publisher_id: - brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}' if referer: brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) @@ -541,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE): }) def build_format_id(kind): - format_id = kind - if tbr: - format_id += '-%dk' % int(tbr) - if height: - format_id += '-%dp' % height - return format_id + return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p') if src or streaming_src: f.update({ @@ -654,7 +647,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # playlist stream 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', @@ -666,7 +659,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', 'only_matching': True, @@ -804,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): # Look for iframe embeds [1] for _, url in re.findall( r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): - entries.append(url if url.startswith('http') else 'http:' + url) + entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url) # Look for <video> tags [2] and embed_in_page embeds [3] # [2] looks like: @@ -833,8 +826,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): player_id = player_id or attrs.get('data-player') or 'default' embed = embed or attrs.get('data-embed') or 'default' - bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % ( - account_id, player_id, embed, video_id) + bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}' # Some brightcove videos may be embedded with video tag only and # without script tag or any mentioning of brightcove at all. Such @@ -865,13 +857,13 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups() - policy_key_id = '%s_%s' % (account_id, player_id) + policy_key_id = f'{account_id}_{player_id}' policy_key = self.cache.load('brightcove', policy_key_id) policy_key_extracted = False store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x) def extract_policy_key(): - base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed) + base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/' config = self._download_json( base_url + 'config.json', video_id, fatal=False) or {} policy_key = try_get( @@ -910,7 +902,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): if not policy_key: policy_key = extract_policy_key() policy_key_extracted = True - headers['Accept'] = 'application/json;pk=%s' % policy_key + headers['Accept'] = f'application/json;pk={policy_key}' try: json_data = self._download_json(api_url, video_id, headers=headers) break @@ -936,7 +928,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): custom_fields['bcadobepassresourceid']) json_data = self._download_json( api_url, video_id, headers={ - 'Accept': 'application/json;pk=%s' % policy_key + 'Accept': f'application/json;pk={policy_key}', }, query={ 'tveToken': tve_token, }) diff --git a/yt_dlp/extractor/bundesliga.py b/yt_dlp/extractor/bundesliga.py index e76dd58..29f8f94 100644 --- a/yt_dlp/extractor/bundesliga.py +++ b/yt_dlp/extractor/bundesliga.py @@ -16,17 +16,17 @@ class BundesligaIE(InfoExtractor): 'upload_date': '20220928', 'duration': 146, 'timestamp': 1664366511, - 'description': 'md5:803d4411bd134140c774021dd4b7598b' - } + 'description': 'md5:803d4411bd134140c774021dd4b7598b', + }, }, { 'url': 'https://www.bundesliga.com/en/bundesliga/videos/latest-features/T8IKc8TX?vid=ROHjs06G', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.bundesliga.com/en/bundesliga/videos/goals?vid=mOG56vWA', - 'only_matching': True - } + 'only_matching': True, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/businessinsider.py b/yt_dlp/extractor/businessinsider.py index 4b3f5e6..7cb9af6 100644 --- a/yt_dlp/extractor/businessinsider.py +++ b/yt_dlp/extractor/businessinsider.py @@ -10,7 +10,7 @@ class BusinessInsiderIE(InfoExtractor): 'info_dict': { 'id': 'cjGDb0X9', 'ext': 'mp4', - 'title': "Bananas give you more radiation exposure than living next to a nuclear power plant", + 'title': 'Bananas give you more radiation exposure than living next to a nuclear power plant', 'description': 'md5:0175a3baf200dd8fa658f94cade841b3', 'upload_date': '20160611', 'timestamp': 1465675620, @@ -41,5 +41,5 @@ class BusinessInsiderIE(InfoExtractor): r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'), webpage, 'jwplatform id') return self.url_result( - 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), + f'jwplatform:{jwplatform_id}', ie=JWPlatformIE.ie_key(), video_id=video_id) diff --git a/yt_dlp/extractor/buzzfeed.py b/yt_dlp/extractor/buzzfeed.py index b30a3b7..9847095 100644 --- a/yt_dlp/extractor/buzzfeed.py +++ b/yt_dlp/extractor/buzzfeed.py @@ -23,8 +23,8 @@ class BuzzFeedIE(InfoExtractor): 'upload_date': '20141024', 'uploader_id': 'Buddhanz1', 'uploader': 'Angry Ram', - } - }] + }, + }], }, { 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia', 'params': { @@ -45,7 +45,7 @@ class BuzzFeedIE(InfoExtractor): 'uploader_id': 'CindysMunchkin', 'uploader': 're:^Munchkin the', }, - }] + }], }, { 'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK', 'info_dict': { diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py index ad35427..e9796f7 100644 --- a/yt_dlp/extractor/byutv.py +++ b/yt_dlp/extractor/byutv.py @@ -36,7 +36,7 @@ class BYUtvIE(InfoExtractor): 'duration': 11645, }, 'params': { - 'skip_download': True + 'skip_download': True, }, }, { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', diff --git a/yt_dlp/extractor/c56.py b/yt_dlp/extractor/c56.py index e4b1c9a..6264803 100644 --- a/yt_dlp/extractor/c56.py +++ b/yt_dlp/extractor/c56.py @@ -38,7 +38,7 @@ class C56IE(InfoExtractor): return self.url_result(sohu_video_info['url'], 'Sohu') page = self._download_json( - 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') + f'http://vxml.56.com/json/{text_id}/', text_id, 'Downloading video info') info = page['info'] @@ -46,7 +46,7 @@ class C56IE(InfoExtractor): { 'format_id': f['type'], 'filesize': int(f['filesize']), - 'url': f['url'] + 'url': f['url'], } for f in info['rfiles'] ] diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index c77179c..b7061a7 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -29,8 +29,8 @@ class CallinIE(InfoExtractor): 'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553', 'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions', 'episode_number': 1, - 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd' - } + 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd', + }, }, { 'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW', 'md5': '14ede27ee2c957b7e4db93140fc0745c', @@ -54,7 +54,7 @@ class CallinIE(InfoExtractor): 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png', 'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5', 'timestamp': 1662100688.005, - } + }, }, { 'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA', 'md5': '16f704ddbf82a27e3930533b12062f07', @@ -78,7 +78,7 @@ class CallinIE(InfoExtractor): 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png', 'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c', 'timestamp': 1661476708.282, - } + }, }] def try_get_user_name(self, d): @@ -94,7 +94,7 @@ class CallinIE(InfoExtractor): next_data = self._search_nextjs_data(webpage, display_id) episode = next_data['props']['pageProps']['episode'] - id = episode['id'] + video_id = episode['id'] title = episode.get('title') or self._generic_title('', webpage) url = episode['m3u8'] formats = self._extract_m3u8_formats(url, display_id, ext='ts') @@ -125,11 +125,11 @@ class CallinIE(InfoExtractor): episode_list = traverse_obj(show_json, ('pageProps', 'show', 'episodes')) or [] episode_number = next( - (len(episode_list) - i for (i, e) in enumerate(episode_list) if e.get('id') == id), + (len(episode_list) - i for i, e in enumerate(episode_list) if e.get('id') == video_id), None) return { - 'id': id, + 'id': video_id, '_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])], 'display_id': display_id, 'title': title, @@ -151,5 +151,5 @@ class CallinIE(InfoExtractor): 'series_id': show_id, 'episode': title, 'episode_number': episode_number, - 'episode_id': id + 'episode_id': video_id, } diff --git a/yt_dlp/extractor/caltrans.py b/yt_dlp/extractor/caltrans.py index f4a4a83..5513bb2 100644 --- a/yt_dlp/extractor/caltrans.py +++ b/yt_dlp/extractor/caltrans.py @@ -11,7 +11,7 @@ class CaltransIE(InfoExtractor): 'title': 'US-50 : Sacramento : Hwy 50 at 24th', 'live_status': 'is_live', 'thumbnail': 'https://cwwp2.dot.ca.gov/data/d3/cctv/image/hwy50at24th/hwy50at24th.jpg', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py index 2650cc1..0d0dccb 100644 --- a/yt_dlp/extractor/cam4.py +++ b/yt_dlp/extractor/cam4.py @@ -12,12 +12,12 @@ class CAM4IE(InfoExtractor): 'age_limit': 18, 'live_status': 'is_live', 'thumbnail': 'https://snapshots.xcdnpro.com/thumbnails/foxynesss', - } + }, } def _real_extract(self, url): channel_id = self._match_id(url) - m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL') + m3u8_playlist = self._download_json(f'https://www.cam4.com/rest/v1.0/profile/{channel_id}/streamInfo', channel_id).get('cdnURL') formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True) diff --git a/yt_dlp/extractor/camdemy.py b/yt_dlp/extractor/camdemy.py index c7079e4..34dc095 100644 --- a/yt_dlp/extractor/camdemy.py +++ b/yt_dlp/extractor/camdemy.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) from ..utils import ( clean_html, parse_duration, @@ -28,7 +25,7 @@ class CamdemyIE(InfoExtractor): 'duration': 1591, 'upload_date': '20130114', 'view_count': int, - } + }, }, { # With non-empty description # webpage returns "No permission or not login" @@ -42,7 +39,7 @@ class CamdemyIE(InfoExtractor): 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', 'creator': 'evercam', 'duration': 318, - } + }, }, { # External source (YouTube) 'url': 'http://www.camdemy.com/media/14842', @@ -76,12 +73,12 @@ class CamdemyIE(InfoExtractor): title = oembed_obj['title'] thumb_url = oembed_obj['thumbnail_url'] - video_folder = compat_urlparse.urljoin(thumb_url, 'video/') + video_folder = urllib.parse.urljoin(thumb_url, 'video/') file_list_doc = self._download_xml( - compat_urlparse.urljoin(video_folder, 'fileList.xml'), + urllib.parse.urljoin(video_folder, 'fileList.xml'), video_id, 'Downloading filelist XML') file_name = file_list_doc.find('./video/item/fileName').text - video_url = compat_urlparse.urljoin(video_folder, file_name) + video_url = urllib.parse.urljoin(video_folder, file_name) # Some URLs return "No permission or not login" in a webpage despite being # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885) @@ -117,35 +114,35 @@ class CamdemyFolderIE(InfoExtractor): 'id': '450', 'title': '信號與系統 2012 & 2011 (Signals and Systems)', }, - 'playlist_mincount': 145 + 'playlist_mincount': 145, }, { # links without trailing slash # and multi-page 'url': 'http://www.camdemy.com/folder/853', 'info_dict': { 'id': '853', - 'title': '科學計算 - 使用 Matlab' + 'title': '科學計算 - 使用 Matlab', }, - 'playlist_mincount': 20 + 'playlist_mincount': 20, }, { # with displayMode parameter. For testing the codes to add parameters 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', 'info_dict': { 'id': '853', - 'title': '科學計算 - 使用 Matlab' + 'title': '科學計算 - 使用 Matlab', }, - 'playlist_mincount': 20 + 'playlist_mincount': 20, }] def _real_extract(self, url): folder_id = self._match_id(url) # Add displayMode=list so that all links are displayed in a single page - parsed_url = list(compat_urlparse.urlparse(url)) - query = dict(compat_urlparse.parse_qsl(parsed_url[4])) + parsed_url = list(urllib.parse.urlparse(url)) + query = dict(urllib.parse.parse_qsl(parsed_url[4])) query.update({'displayMode': 'list'}) - parsed_url[4] = compat_urllib_parse_urlencode(query) - final_url = compat_urlparse.urlunparse(parsed_url) + parsed_url[4] = urllib.parse.urlencode(query) + final_url = urllib.parse.urlunparse(parsed_url) page = self._download_webpage(final_url, folder_id) matches = re.findall(r"href='(/media/\d+/?)'", page) diff --git a/yt_dlp/extractor/camfm.py b/yt_dlp/extractor/camfm.py index 11dafa4..6036f13 100644 --- a/yt_dlp/extractor/camfm.py +++ b/yt_dlp/extractor/camfm.py @@ -37,7 +37,7 @@ class CamFMShowIE(InfoExtractor): 'thumbnail': urljoin('https://camfm.co.uk', self._search_regex( r'<img[^>]+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)), 'title': self._html_search_regex('<h1>([^<]+)</h1>', page, 'title', fatal=False), - 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page)) + 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page)), } @@ -56,7 +56,7 @@ class CamFMEpisodeIE(InfoExtractor): 'series': 'AITAA: Am I the Agony Aunt?', 'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1', 'categories': ['Entertainment'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/cammodels.py b/yt_dlp/extractor/cammodels.py index 135b315..7388cfb 100644 --- a/yt_dlp/extractor/cammodels.py +++ b/yt_dlp/extractor/cammodels.py @@ -7,14 +7,14 @@ class CamModelsIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.cammodels.com/cam/AutumnKnight/', 'only_matching': True, - 'age_limit': 18 + 'age_limit': 18, }] def _real_extract(self, url): user_id = self._match_id(url) manifest = self._download_json( - 'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id) + f'https://manifest-server.naiadsystems.com/live/s:{user_id}.json', user_id) formats = [] thumbnails = [] @@ -36,7 +36,7 @@ class CamModelsIE(InfoExtractor): format_id_list = [format_id] height = int_or_none(media.get('videoHeight')) if height is not None: - format_id_list.append('%dp' % height) + format_id_list.append(f'{height}p') f = { 'url': media_url, 'format_id': '-'.join(format_id_list), @@ -73,5 +73,5 @@ class CamModelsIE(InfoExtractor): 'thumbnails': thumbnails, 'is_live': True, 'formats': formats, - 'age_limit': 18 + 'age_limit': 18, } diff --git a/yt_dlp/extractor/camtasia.py b/yt_dlp/extractor/camtasia.py index 70ab6c6..3266431 100644 --- a/yt_dlp/extractor/camtasia.py +++ b/yt_dlp/extractor/camtasia.py @@ -17,7 +17,7 @@ class CamtasiaEmbedIE(InfoExtractor): 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1', 'ext': 'flv', 'duration': 2235.90, - } + }, }, { 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63', 'info_dict': { @@ -25,12 +25,12 @@ class CamtasiaEmbedIE(InfoExtractor): 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip', 'ext': 'flv', 'duration': 2235.93, - } + }, }], 'info_dict': { 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', }, - 'skip': 'webpage dead' + 'skip': 'webpage dead', }, ] diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 745e695..3a0df95 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -21,7 +21,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20211028', 'duration': 1125, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/le-journal/topic/24512/la-poste-fait-de-neuchatel-un-pole-cryptographique', 'info_dict': { @@ -33,7 +33,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20211028', 'duration': 138, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/eureka/episode/24484/ces-innovations-qui-veulent-rendre-lagriculture-plus-durable', 'info_dict': { @@ -45,7 +45,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20211026', 'duration': 360, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/avec-le-temps/episode/23516/redonner-de-leclat-grace-au-polissage', 'info_dict': { @@ -57,7 +57,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20210726', 'duration': 360, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura', 'info_dict': { diff --git a/yt_dlp/extractor/canalc2.py b/yt_dlp/extractor/canalc2.py index 597cb2a..c725545 100644 --- a/yt_dlp/extractor/canalc2.py +++ b/yt_dlp/extractor/canalc2.py @@ -26,7 +26,7 @@ class Canalc2IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.canalc2.tv/video/%s' % video_id, video_id) + f'http://www.canalc2.tv/video/{video_id}', video_id) title = self._html_search_regex( r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>', diff --git a/yt_dlp/extractor/canalplus.py b/yt_dlp/extractor/canalplus.py index 3ff5c3f..728b7a0 100644 --- a/yt_dlp/extractor/canalplus.py +++ b/yt_dlp/extractor/canalplus.py @@ -53,7 +53,7 @@ class CanalplusIE(InfoExtractor): video_data = self._download_json(info_url, video_id, 'Downloading video JSON') if isinstance(video_data, list): - video_data = [video for video in video_data if video.get('ID') == video_id][0] + video_data = next(video for video in video_data if video.get('ID') == video_id) media = video_data['MEDIA'] infos = video_data['INFOS'] @@ -97,8 +97,7 @@ class CanalplusIE(InfoExtractor): return { 'id': video_id, 'display_id': display_id, - 'title': '%s - %s' % (titrage['TITRE'], - titrage['SOUS_TITRE']), + 'title': '{} - {}'.format(titrage['TITRE'], titrage['SOUS_TITRE']), 'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')), 'thumbnails': thumbnails, 'description': infos.get('DESCRIPTION'), diff --git a/yt_dlp/extractor/caracoltv.py b/yt_dlp/extractor/caracoltv.py index 79f7752..493ffda 100644 --- a/yt_dlp/extractor/caracoltv.py +++ b/yt_dlp/extractor/caracoltv.py @@ -78,13 +78,13 @@ class CaracolTvPlayIE(InfoExtractor): 'device_data': { 'device_id': str(uuid.uuid4()), 'device_token': '', - 'device_type': 'web' + 'device_type': 'web', }, 'login_data': { 'enabled': True, 'email': email, 'password': password, - } + }, }).encode())['user_token'] def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None): diff --git a/yt_dlp/extractor/cartoonnetwork.py b/yt_dlp/extractor/cartoonnetwork.py index 4dd7ac4..1749a00 100644 --- a/yt_dlp/extractor/cartoonnetwork.py +++ b/yt_dlp/extractor/cartoonnetwork.py @@ -27,7 +27,7 @@ class CartoonNetworkIE(TurnerBaseIE): if content_re: metadata_re = r'|video_metadata\.content_' + content_re return self._search_regex( - r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re), + rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";', webpage, name, fatal=fatal) media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index a418026..1522b08 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -6,9 +6,6 @@ import urllib.parse import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( ExtractorError, int_or_none, @@ -99,7 +96,7 @@ class CBCIE(InfoExtractor): # multiple CBC.APP.Caffeine.initInstance(...) 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', 'info_dict': { - 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME + 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME: actual title includes " | CBC News" 'id': 'dog-indoor-exercise-winter-1.3928238', 'description': 'md5:c18552e41726ee95bd75210d1ca9194c', }, @@ -108,7 +105,7 @@ class CBCIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) + return False if CBCPlayerIE.suitable(url) else super().suitable(url) def _extract_player_init(self, player_init, display_id): player_info = self._parse_json(player_init, display_id, js_to_json) @@ -116,15 +113,15 @@ class CBCIE(InfoExtractor): if not media_id: clip_id = player_info['clipId'] feed = self._download_json( - 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, + f'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={{:mpsReleases}}{{{clip_id}}}', clip_id, fatal=False) if feed: - media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) + media_id = try_get(feed, lambda x: x['entries'][0]['guid'], str) if not media_id: media_id = self._download_json( 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, clip_id)['entries'][0]['id'].split('/')[-1] - return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + return self.url_result(f'cbcplayer:{media_id}', 'CBCPlayer', media_id) def _real_extract(self, url): display_id = self._match_id(url) @@ -142,7 +139,7 @@ class CBCIE(InfoExtractor): r'guid["\']\s*:\s*["\'](\d+)'): media_ids.extend(re.findall(media_id_re, webpage)) entries.extend([ - self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + self.url_result(f'cbcplayer:{media_id}', 'CBCPlayer', media_id) for media_id in orderedSet(media_ids)]) return self.playlist_result( entries, display_id, strip_or_none(title), @@ -322,11 +319,11 @@ class CBCPlayerIE(InfoExtractor): '_type': 'url_transparent', 'ie_key': 'ThePlatform', 'url': smuggle_url( - 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, { - 'force_smil_url': True + f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', { + 'force_smil_url': True, }), 'id': video_id, - '_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS + '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS } @@ -338,13 +335,13 @@ class CBCPlayerPlaylistIE(InfoExtractor): 'playlist_mincount': 25, 'info_dict': { 'id': 'news/tv shows/the national/latest broadcast', - } + }, }, { 'url': 'https://www.cbc.ca/player/news/Canada/North', 'playlist_mincount': 25, 'info_dict': { 'id': 'news/canada/north', - } + }, }] def _real_extract(self, url): @@ -355,7 +352,7 @@ class CBCPlayerPlaylistIE(InfoExtractor): def entries(): for video_id in traverse_obj(json_content, ( - 'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id' + 'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id', )): yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE) @@ -453,15 +450,13 @@ class CBCGemIE(InfoExtractor): # JWT is decoded here and 'exp' field is extracted # It is a Unix timestamp for when the token expires b64_data = self._claims_token.split('.')[1] - data = base64.urlsafe_b64decode(b64_data + "==") + data = base64.urlsafe_b64decode(b64_data + '==') return json.loads(data)['exp'] def claims_token_expired(self): exp = self._get_claims_token_expiry() - if exp - time.time() < 10: - # It will expire in less than 10 seconds, or has already expired - return True - return False + # It will expire in less than 10 seconds, or has already expired + return exp - time.time() < 10 def claims_token_valid(self): return self._claims_token is not None and not self.claims_token_expired() @@ -535,17 +530,17 @@ class CBCGemIE(InfoExtractor): self._remove_duplicate_formats(formats) formats.extend(self._find_secret_formats(formats, video_id)) - for format in formats: - if format.get('vcodec') == 'none': - if format.get('ext') is None: - format['ext'] = 'm4a' - if format.get('acodec') is None: - format['acodec'] = 'mp4a.40.2' + for fmt in formats: + if fmt.get('vcodec') == 'none': + if fmt.get('ext') is None: + fmt['ext'] = 'm4a' + if fmt.get('acodec') is None: + fmt['acodec'] = 'mp4a.40.2' # Put described audio at the beginning of the list, so that it # isn't chosen by default, as most people won't want it. - if 'descriptive' in format['format_id'].lower(): - format['preference'] = -2 + if 'descriptive' in fmt['format_id'].lower(): + fmt['preference'] = -2 return { 'id': video_id, @@ -670,7 +665,7 @@ class CBCGemLiveIE(InfoExtractor): 'title': r're:^Ottawa [0-9\-: ]+', 'description': 'The live TV channel and local programming from Ottawa', 'live_status': 'is_live', - 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*' + 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*', }, 'params': {'skip_download': True}, 'skip': 'Live might have ended', @@ -690,7 +685,7 @@ class CBCGemLiveIE(InfoExtractor): }, 'params': {'skip_download': True}, 'skip': 'Live might have ended', - } + }, ] def _real_extract(self, url): @@ -729,5 +724,5 @@ class CBCGemLiveIE(InfoExtractor): 'description': 'description', 'thumbnail': ('images', 'card', 'url'), 'timestamp': ('airDate', {parse_iso8601}), - }) + }), } diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index aca9782..e825588 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -31,7 +31,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE return subtitles def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_info): - tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id) + tp_path = f'dJ5BDC/media/guid/{mpx_acc}/{content_id}' tp_release_url = f'https://link.theplatform.com/s/{tp_path}' info = self._extract_theplatform_metadata(tp_path, content_id) @@ -41,7 +41,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE try: tp_formats, tp_subtitles = self._extract_theplatform_smil( update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data' % asset_type) + f'Downloading {asset_type} SMIL data') except ExtractorError as e: last_e = e if asset_type != 'fallback': @@ -50,7 +50,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE try: tp_formats, tp_subtitles = self._extract_theplatform_smil( update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data, trying again with another format' % asset_type) + f'Downloading {asset_type} SMIL data, trying again with another format') except ExtractorError as e: last_e = e continue diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py index 5a8ebb8..972e111 100644 --- a/yt_dlp/extractor/cbsnews.py +++ b/yt_dlp/extractor/cbsnews.py @@ -1,6 +1,5 @@ import base64 import re -import urllib.error import urllib.parse import zlib diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py index ca6b82c..1d781cc 100644 --- a/yt_dlp/extractor/ccc.py +++ b/yt_dlp/extractor/ccc.py @@ -25,7 +25,7 @@ class CCCIE(InfoExtractor): 'timestamp': 1388188800, 'duration': 3710, 'tags': list, - } + }, }, { 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', 'only_matching': True, @@ -35,7 +35,7 @@ class CCCIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id') - event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id) + event_data = self._download_json(f'https://media.ccc.de/public/events/{event_id}', event_id) formats = [] for recording in event_data.get('recordings', []): @@ -96,7 +96,7 @@ class CCCPlaylistIE(InfoExtractor): 'title': 'Datenspuren 2023', 'id': 'DS2023', }, - 'playlist_count': 37 + 'playlist_count': 37, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index ab840f3..ffe4b49 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -24,7 +24,7 @@ class CCMAIE(InfoExtractor): 'timestamp': 1478608140, 'upload_date': '20161108', 'age_limit': 0, - } + }, }, { 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', 'md5': 'fa3e38f269329a278271276330261425', @@ -37,7 +37,7 @@ class CCMAIE(InfoExtractor): 'timestamp': 1494622500, 'vcodec': 'none', 'categories': ['Esports'], - } + }, }, { 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/', 'md5': 'b43c3d3486f430f3032b5b160d80cbc3', @@ -51,7 +51,7 @@ class CCMAIE(InfoExtractor): 'subtitles': 'mincount:4', 'age_limit': 16, 'series': 'Crims', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py index 8552ee5..18c080d 100644 --- a/yt_dlp/extractor/cctv.py +++ b/yt_dlp/extractor/cctv.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, try_get, @@ -167,17 +166,17 @@ class CCTVIE(InfoExtractor): if isinstance(video, dict): for quality, chapters_key in enumerate(('lowChapters', 'chapters')): video_url = try_get( - video, lambda x: x[chapters_key][0]['url'], compat_str) + video, lambda x: x[chapters_key][0]['url'], str) if video_url: formats.append({ 'url': video_url, 'format_id': 'http', 'quality': quality, # Sample clip - 'preference': -10 + 'preference': -10, }) - hls_url = try_get(data, lambda x: x['hls_url'], compat_str) + hls_url = try_get(data, lambda x: x['hls_url'], str) if hls_url: hls_url = re.sub(r'maxbr=\d+&?', '', hls_url) formats.extend(self._extract_m3u8_formats( diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 0a5a524..62ee8b1 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -6,9 +6,10 @@ import hmac import json import random import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_ord, compat_urllib_parse_unquote +from ..compat import compat_ord from ..utils import ( ExtractorError, float_or_none, @@ -51,7 +52,7 @@ class CDAIE(InfoExtractor): 'age_limit': 0, 'upload_date': '20160221', 'timestamp': 1456078244, - } + }, }, { 'url': 'http://www.cda.pl/video/57413289', 'md5': 'a88828770a8310fc00be6c95faf7f4d5', @@ -67,7 +68,7 @@ class CDAIE(InfoExtractor): 'age_limit': 0, 'upload_date': '20160220', 'timestamp': 1455968218, - } + }, }, { # Age-restricted with vfilm redirection 'url': 'https://www.cda.pl/video/8753244c4', @@ -85,7 +86,7 @@ class CDAIE(InfoExtractor): 'average_rating': float, 'timestamp': 1633888264, 'upload_date': '20211010', - } + }, }, { # Age-restricted without vfilm redirection 'url': 'https://www.cda.pl/video/17028157b8', @@ -103,7 +104,7 @@ class CDAIE(InfoExtractor): 'average_rating': float, 'timestamp': 1699705901, 'upload_date': '20231111', - } + }, }, { 'url': 'http://ebd.cda.pl/0x0/5749950c', 'only_matching': True, @@ -263,7 +264,7 @@ class CDAIE(InfoExtractor): def decrypt_file(a): for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): a = a.replace(p, '') - a = compat_urllib_parse_unquote(a) + a = urllib.parse.unquote(a) b = [] for c in a: f = compat_ord(c) @@ -280,16 +281,16 @@ class CDAIE(InfoExtractor): def extract_format(page, version): json_str = self._html_search_regex( r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page, - '%s player_json' % version, fatal=False, group='player_data') + f'{version} player_json', fatal=False, group='player_data') if not json_str: return player_data = self._parse_json( - json_str, '%s player_data' % version, fatal=False) + json_str, f'{version} player_data', fatal=False) if not player_data: return video = player_data.get('video') if not video or 'file' not in video: - self.report_warning('Unable to extract %s version information' % version) + self.report_warning(f'Unable to extract {version} version information') return if video['file'].startswith('uggc'): video['file'] = codecs.decode(video['file'], 'rot_13') @@ -310,11 +311,11 @@ class CDAIE(InfoExtractor): continue data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2, 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]} - data = json.dumps(data).encode('utf-8') + data = json.dumps(data).encode() video_url = self._download_json( f'https://www.cda.pl/video/{video_id}', video_id, headers={ 'Content-Type': 'application/json', - 'X-Requested-With': 'XMLHttpRequest' + 'X-Requested-With': 'XMLHttpRequest', }, data=data, note=f'Fetching {quality} url', errnote=f'Failed to fetch {quality} url', fatal=False) if try_get(video_url, lambda x: x['result']['status']) == 'ok': @@ -322,7 +323,7 @@ class CDAIE(InfoExtractor): info_dict['formats'].append({ 'url': video_url, 'format_id': quality, - 'height': int_or_none(quality[:-1]) + 'height': int_or_none(quality[:-1]), }) if not info_dict['duration']: @@ -340,11 +341,11 @@ class CDAIE(InfoExtractor): webpage = handler( urljoin(self._BASE_URL, href), video_id, - 'Downloading %s version information' % resolution, fatal=False) + f'Downloading {resolution} version information', fatal=False) if not webpage: # Manually report warning because empty page is returned when # invalid version is requested. - self.report_warning('Unable to download %s version information' % resolution) + self.report_warning(f'Unable to download {resolution} version information') continue extract_format(webpage, resolution) diff --git a/yt_dlp/extractor/cellebrite.py b/yt_dlp/extractor/cellebrite.py index 9896a31..e90365a 100644 --- a/yt_dlp/extractor/cellebrite.py +++ b/yt_dlp/extractor/cellebrite.py @@ -14,7 +14,7 @@ class CellebriteIE(InfoExtractor): 'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED', 'duration': 455, 'tags': [], - } + }, }, { 'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/', 'info_dict': { @@ -25,7 +25,7 @@ class CellebriteIE(InfoExtractor): 'description': 'md5:e9a3d124c7287b0b07bad2547061cacf', 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png', 'title': 'Android Extractions Explained', - } + }, }] def _get_formats_and_subtitles(self, json_data, display_id): diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 5d63357..c323985 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse from ..networking import Request from ..utils import ( ExtractorError, @@ -97,11 +97,11 @@ class CeskaTelevizeIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) webpage, urlh = self._download_webpage_handle(url, playlist_id) - parsed_url = compat_urllib_parse_urlparse(urlh.url) + parsed_url = urllib.parse.urlparse(urlh.url) site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') playlist_title = self._og_search_title(webpage, default=None) if site_name and playlist_title: - playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0] + playlist_title = re.split(rf'\s*[—|]\s*{site_name}', playlist_title, maxsplit=1)[0] playlist_description = self._og_search_description(webpage, default=None) if playlist_description: playlist_description = playlist_description.replace('\xa0', ' ') @@ -122,15 +122,15 @@ class CeskaTelevizeIE(InfoExtractor): iframe_hash = self._download_webpage( 'https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id, note='Getting IFRAME hash') - query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } + query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec} webpage = self._download_webpage( 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, note='Downloading player', query=query) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' - if '%s</p>' % NOT_AVAILABLE_STRING in webpage: + if f'{NOT_AVAILABLE_STRING}</p>' in webpage: self.raise_geo_restricted(NOT_AVAILABLE_STRING) - if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): + if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen')): raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) type_ = None @@ -183,7 +183,7 @@ class CeskaTelevizeIE(InfoExtractor): if playlist_url == 'error_region': raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - req = Request(compat_urllib_parse_unquote(playlist_url)) + req = Request(urllib.parse.unquote(playlist_url)) req.headers['Referer'] = url playlist = self._download_json(req, playlist_id, fatal=False) @@ -203,11 +203,11 @@ class CeskaTelevizeIE(InfoExtractor): if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', 'm3u8_native', - m3u8_id='hls-%s' % format_id, fatal=False) + m3u8_id=f'hls-{format_id}', fatal=False) else: stream_formats = self._extract_mpd_formats( stream_url, playlist_id, - mpd_id='dash-%s' % format_id, fatal=False) + mpd_id=f'dash-{format_id}', fatal=False) if 'drmOnly=true' in stream_url: for f in stream_formats: f['has_drm'] = True @@ -236,7 +236,7 @@ class CeskaTelevizeIE(InfoExtractor): if playlist_len == 1: final_title = playlist_title or title else: - final_title = '%s (%s)' % (playlist_title, title) + final_title = f'{playlist_title} ({title})' entries.append({ 'id': item_id, @@ -261,7 +261,7 @@ class CeskaTelevizeIE(InfoExtractor): 'cs': [{ 'ext': 'srt', 'data': srt_subs, - }] + }], } @staticmethod @@ -282,7 +282,7 @@ class CeskaTelevizeIE(InfoExtractor): if m: yield m.group(1) start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) - yield '{0} --> {1}'.format(start, stop) + yield f'{start} --> {stop}' else: yield line diff --git a/yt_dlp/extractor/cgtn.py b/yt_dlp/extractor/cgtn.py index 5d9d9bc..b9757e0 100644 --- a/yt_dlp/extractor/cgtn.py +++ b/yt_dlp/extractor/cgtn.py @@ -20,8 +20,8 @@ class CGTNIE(InfoExtractor): 'categories': ['Video'], }, 'params': { - 'skip_download': True - } + 'skip_download': True, + }, }, { 'url': 'https://news.cgtn.com/news/2021-06-06/China-Indonesia-vow-to-further-deepen-maritime-cooperation-10REvJCewCY/index.html', 'info_dict': { @@ -36,9 +36,9 @@ class CGTNIE(InfoExtractor): 'upload_date': '20210606', }, 'params': { - 'skip_download': False - } - } + 'skip_download': False, + }, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index 99dfcfd..b49f741 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -37,7 +37,7 @@ class ChaturbateIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://chaturbate.com/%s/' % video_id, video_id, + f'https://chaturbate.com/{video_id}/', video_id, headers=self.geo_verification_headers()) found_m3u8_urls = [] @@ -85,7 +85,7 @@ class ChaturbateIE(InfoExtractor): formats = [] for m3u8_url in m3u8_urls: for known_id in ('fast', 'slow'): - if '_%s' % known_id in m3u8_url: + if f'_{known_id}' in m3u8_url: m3u8_id = known_id break else: @@ -99,7 +99,7 @@ class ChaturbateIE(InfoExtractor): return { 'id': video_id, 'title': video_id, - 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, + 'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg', 'age_limit': self._rta_search(webpage), 'is_live': True, 'formats': formats, diff --git a/yt_dlp/extractor/cinemax.py b/yt_dlp/extractor/cinemax.py index 706ec85..66831ef 100644 --- a/yt_dlp/extractor/cinemax.py +++ b/yt_dlp/extractor/cinemax.py @@ -20,6 +20,6 @@ class CinemaxIE(HBOBaseIE): def _real_extract(self, url): path, video_id = self._match_valid_url(url).groups() - info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id) + info = self._extract_info(f'https://www.cinemax.com/{path}.xml', video_id) info['id'] = video_id return info diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py index 745b71f..834890d 100644 --- a/yt_dlp/extractor/cinetecamilano.py +++ b/yt_dlp/extractor/cinetecamilano.py @@ -27,8 +27,8 @@ class CinetecaMilanoIE(InfoExtractor): 'modified_date': '20200520', 'duration': 3139, 'release_timestamp': 1643446208, - 'modified_timestamp': int - } + 'modified_timestamp': int, + }, }] def _real_extract(self, url): @@ -38,7 +38,7 @@ class CinetecaMilanoIE(InfoExtractor): f'https://www.cinetecamilano.it/api/catalogo/{video_id}/?', video_id, headers={ 'Referer': url, - 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '' + 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '', }) except ExtractorError as e: if ((isinstance(e.cause, HTTPError) and e.cause.status == 500) @@ -58,5 +58,5 @@ class CinetecaMilanoIE(InfoExtractor): 'modified_timestamp': parse_iso8601(archive.get('created_at'), delimiter=' '), 'thumbnail': urljoin(url, try_get(archive, lambda x: x['thumb']['src'].replace('/public/', '/storage/'))), 'formats': self._extract_m3u8_formats( - urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4') + urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4'), } diff --git a/yt_dlp/extractor/cineverse.py b/yt_dlp/extractor/cineverse.py index 4405297..c8c6c48 100644 --- a/yt_dlp/extractor/cineverse.py +++ b/yt_dlp/extractor/cineverse.py @@ -13,7 +13,7 @@ from ..utils import ( class CineverseBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://www\.(?P<host>%s)' % '|'.join(map(re.escape, ( + _VALID_URL_BASE = r'https?://www\.(?P<host>{})'.format('|'.join(map(re.escape, ( 'cineverse.com', 'asiancrush.com', 'dovechannel.com', @@ -21,7 +21,7 @@ class CineverseBaseIE(InfoExtractor): 'midnightpulp.com', 'fandor.com', 'retrocrush.tv', - ))) + )))) class CineverseIE(CineverseBaseIE): @@ -38,7 +38,7 @@ class CineverseIE(CineverseBaseIE): 'duration': 5811.597, 'description': 'md5:892fd62a05611d394141e8394ace0bc6', 'age_limit': 13, - } + }, }, { 'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie', 'skip': 'geo-blocked', @@ -55,7 +55,7 @@ class CineverseIE(CineverseBaseIE): 'duration': 1485.067, 'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.', 'series': 'Space Adventure COBRA (Original Japanese)', - } + }, }] def _real_extract(self, url): @@ -104,7 +104,7 @@ class CineverseDetailsIE(CineverseBaseIE): 'info_dict': { 'title': 'Space Adventure COBRA (Original Japanese)', 'id': '1000000023012', - } + }, }, { 'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel', 'info_dict': { diff --git a/yt_dlp/extractor/ciscolive.py b/yt_dlp/extractor/ciscolive.py index 0668578..1584ca6 100644 --- a/yt_dlp/extractor/ciscolive.py +++ b/yt_dlp/extractor/ciscolive.py @@ -105,7 +105,7 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): @classmethod def suitable(cls, url): - return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url) + return False if CiscoLiveSessionIE.suitable(url) else super().suitable(url) @staticmethod def _check_bc_id_exists(rf_item): @@ -117,7 +117,7 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): for page_num in itertools.count(1): results = self._call_api( 'search', None, query, url, - 'Downloading search JSON page %d' % page_num) + f'Downloading search JSON page {page_num}') sl = try_get(results, lambda x: x['sectionList'][0], dict) if sl: results = sl diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 85585df..d39347c 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -46,7 +46,7 @@ class CiscoWebexIE(InfoExtractor): headers['accessPwd'] = password stream, urlh = self._download_json_handle( - 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), + f'https://{subdomain}.webex.com/webappng/api/v1/recordings/{video_id}/stream', video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429)) if urlh.status == 403: @@ -101,6 +101,6 @@ class CiscoWebexIE(InfoExtractor): 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), 'timestamp': unified_timestamp(stream.get('createTime')), 'duration': int_or_none(stream.get('duration'), 1000), - 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), + 'webpage_url': f'https://{subdomain}.webex.com/recordingservice/sites/{siteurl}/recording/playback/{video_id}', 'formats': formats, } diff --git a/yt_dlp/extractor/cjsw.py b/yt_dlp/extractor/cjsw.py index c37a3b8..b80236a 100644 --- a/yt_dlp/extractor/cjsw.py +++ b/yt_dlp/extractor/cjsw.py @@ -27,7 +27,7 @@ class CJSWIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) program, episode_id = mobj.group('program', 'id') - audio_id = '%s/%s' % (program, episode_id) + audio_id = f'{program}/{episode_id}' webpage = self._download_webpage(url, episode_id) diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py index 67b56e0..393f217 100644 --- a/yt_dlp/extractor/clippit.py +++ b/yt_dlp/extractor/clippit.py @@ -23,7 +23,7 @@ class ClippitIE(InfoExtractor): 'upload_date': '20160826', 'description': 'BattleBots | ABC', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, } def _real_extract(self, url): @@ -36,7 +36,7 @@ class ClippitIE(InfoExtractor): quality = qualities(FORMATS) formats = [] for format_id in FORMATS: - url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id, + url = self._html_search_regex(rf'data-{format_id}-file="(.+?)"', webpage, 'url', fatal=False) if not url: continue diff --git a/yt_dlp/extractor/cliprs.py b/yt_dlp/extractor/cliprs.py index c2add02..42f78ca 100644 --- a/yt_dlp/extractor/cliprs.py +++ b/yt_dlp/extractor/cliprs.py @@ -15,7 +15,7 @@ class ClipRsIE(OnetBaseIE): 'duration': 229, 'timestamp': 1459850243, 'upload_date': '20160405', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py index 1f9a5f6..77469ed 100644 --- a/yt_dlp/extractor/closertotruth.py +++ b/yt_dlp/extractor/closertotruth.py @@ -15,7 +15,7 @@ class CloserToTruthIE(InfoExtractor): 'title': 'Solutions to the Mind-Body Problem?', 'upload_date': '20140221', 'timestamp': 1392956007, - 'uploader_id': 'CTTXML' + 'uploader_id': 'CTTXML', }, 'params': { 'skip_download': True, @@ -29,7 +29,7 @@ class CloserToTruthIE(InfoExtractor): 'title': 'How do Brains Work?', 'upload_date': '20140221', 'timestamp': 1392956024, - 'uploader_id': 'CTTXML' + 'uploader_id': 'CTTXML', }, 'params': { 'skip_download': True, @@ -69,7 +69,7 @@ class CloserToTruthIE(InfoExtractor): entry_ids.add(entry_id) entries.append({ '_type': 'url_transparent', - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'url': f'kaltura:{partner_id}:{entry_id}', 'ie_key': 'Kaltura', 'title': mobj.group('title'), }) @@ -83,7 +83,7 @@ class CloserToTruthIE(InfoExtractor): return { '_type': 'url_transparent', 'display_id': display_id, - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'url': f'kaltura:{partner_id}:{entry_id}', 'ie_key': 'Kaltura', - 'title': title + 'title': title, } diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index a812c24..8a40946 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -6,11 +6,11 @@ from .common import InfoExtractor class CloudflareStreamIE(InfoExtractor): _SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' - _EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo=' - _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' + _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video=' + _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+' _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})' _EMBED_REGEX = [ - rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1', + rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1', rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', ] _TESTS = [{ @@ -25,6 +25,14 @@ class CloudflareStreamIE(InfoExtractor): 'skip_download': 'm3u8', }, }, { + 'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e', + 'info_dict': { + 'id': '0e8e040aec776862e1d632a699edf59e', + 'ext': 'mp4', + 'title': '0e8e040aec776862e1d632a699edf59e', + 'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', + }, + }, { 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'only_matching': True, }, { @@ -36,6 +44,9 @@ class CloudflareStreamIE(InfoExtractor): }, { 'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe', 'only_matching': True, + }, { + 'url': 'https://watch.cloudflarestream.com/eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJraWQiOiJmYTA0YjViMzQ2NDkwYTM5NWJiNzQ1NWFhZTA2YzYwZSIsInN1YiI6Ijg4ZDQxMDhhMzY0MjA3M2VhYmFhZjg3ZGExODJkMjYzIiwiZXhwIjoxNjAwNjA5MzE5fQ.xkRJwLGkt0nZ%5F0BlPiwU7iW4pqb4lKkznbKfAhGg0tGcxSS6ZBA3lcTUwu7W%2DyCFbnAl%2Dhqk3Fn%5FqeQS%5FQydP27qTHpB9iIFFsMtk1tqzGZV5v4yrYDnwLSKzEKvVd6QwJnfABtxH2JdpSNuWlMUiVXFxGWgjOw6QeTNDDklTQYXV%5FNLV7sErSn5CeOPeRRkdXb%2D8ip%5FVOcfk1nDsFoOo4fctFtGP0wYMyY5ae8nhhatydHwevuvJCcEvEfh%2D4qjq9mCZOodevmtSQ4YWmggf4BxtWnDWYrGW8Otp6oqezrR8oY4%2DbKdV6PaqBj49aJdcls6xK7PmM8%5Fvjy3xfm0Mg', + 'only_matching': True, }] _WEBPAGE_TESTS = [{ 'url': 'https://upride.cc/incident/shoulder-pass-at-light/', @@ -53,7 +64,7 @@ class CloudflareStreamIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' - base_url = 'https://%s/%s/' % (domain, video_id) + base_url = f'https://{domain}/{video_id}/' if '.' in video_id: video_id = self._parse_json(base64.urlsafe_b64decode( video_id.split('.')[1] + '==='), video_id)['sub'] diff --git a/yt_dlp/extractor/cloudycdn.py b/yt_dlp/extractor/cloudycdn.py index e6e470e..6e757d7 100644 --- a/yt_dlp/extractor/cloudycdn.py +++ b/yt_dlp/extractor/cloudycdn.py @@ -1,3 +1,5 @@ +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -22,7 +24,7 @@ class CloudyCDNIE(InfoExtractor): 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', - } + }, }, { 'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1', 'md5': '798828a479151e2444d8dcfbec76e482', @@ -34,7 +36,21 @@ class CloudyCDNIE(InfoExtractor): 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg', 'duration': 1205, 'upload_date': '20221130', - } + }, + }, { + # Video-only m3u8 formats need manual fixup + 'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074', + 'md5': 'fc472e40f6e6238446509be411c920e2', + 'info_dict': { + 'id': '08j_d24-6000-074', + 'ext': 'mp4', + 'upload_date': '20240620', + 'duration': 1673, + 'title': 'D24-6000-074-cetstud', + 'timestamp': 1718902233, + 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg', + }, + 'params': {'format': 'bv'}, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/', @@ -47,7 +63,7 @@ class CloudyCDNIE(InfoExtractor): 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg', 'timestamp': 1677181513, 'title': 'LIB-2', - } + }, }] def _real_extract(self, url): @@ -63,6 +79,9 @@ class CloudyCDNIE(InfoExtractor): formats, subtitles = [], {} for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})): fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False) + for fmt in fmts: + if re.search(r'chunklist_b\d+_vo_', fmt['url']): + fmt['acodec'] = 'none' formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) diff --git a/yt_dlp/extractor/clubic.py b/yt_dlp/extractor/clubic.py index 716f259..c908e61 100644 --- a/yt_dlp/extractor/clubic.py +++ b/yt_dlp/extractor/clubic.py @@ -18,7 +18,7 @@ class ClubicIE(InfoExtractor): 'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité', 'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*', 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$', - } + }, }, { 'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html', 'only_matching': True, @@ -27,7 +27,7 @@ class ClubicIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id + player_url = f'http://player.m6web.fr/v1/player/clubic/{video_id}.html' player_page = self._download_webpage(player_url, video_id) config = self._parse_json(self._search_regex( diff --git a/yt_dlp/extractor/clyp.py b/yt_dlp/extractor/clyp.py index 273d002..2702427 100644 --- a/yt_dlp/extractor/clyp.py +++ b/yt_dlp/extractor/clyp.py @@ -58,13 +58,13 @@ class ClypIE(InfoExtractor): query['token'] = token metadata = self._download_json( - 'https://api.clyp.it/%s' % audio_id, audio_id, query=query) + f'https://api.clyp.it/{audio_id}', audio_id, query=query) formats = [] for secure in ('', 'Secure'): for ext in ('Ogg', 'Mp3'): - format_id = '%s%s' % (secure, ext) - format_url = metadata.get('%sUrl' % format_id) + format_id = f'{secure}{ext}' + format_url = metadata.get(f'{format_id}Url') if format_url: formats.append({ 'url': format_url, diff --git a/yt_dlp/extractor/cmt.py b/yt_dlp/extractor/cmt.py index 6359102..8e53b7f 100644 --- a/yt_dlp/extractor/cmt.py +++ b/yt_dlp/extractor/cmt.py @@ -1,6 +1,6 @@ from .mtv import MTVIE -# TODO Remove - Reason: Outdated Site +# TODO: Remove - Reason: Outdated Site class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE @@ -52,4 +52,4 @@ class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) mgid = self._extract_mgid(webpage, url) - return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) + return self.url_result(f'http://media.mtvnservices.com/embed/{mgid}') diff --git a/yt_dlp/extractor/cnn.py b/yt_dlp/extractor/cnn.py index 61b62fa..fe7615a 100644 --- a/yt_dlp/extractor/cnn.py +++ b/yt_dlp/extractor/cnn.py @@ -26,7 +26,7 @@ class CNNIE(TurnerBaseIE): 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', 'ext': 'mp4', 'title': "Student's epic speech stuns new freshmen", - 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", + 'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."', 'upload_date': '20130821', }, 'expected_warnings': ['Failed to download m3u8 information'], @@ -161,7 +161,7 @@ class CNNIndonesiaIE(InfoExtractor): 'release_timestamp': 1662859088, 'release_date': '20220911', 'uploader': 'Asfahan Yahsyi', - } + }, }, { 'url': 'https://www.cnnindonesia.com/internasional/20220911104341-139-846189/video-momen-charles-disambut-meriah-usai-dilantik-jadi-raja-inggris', 'info_dict': { @@ -178,7 +178,7 @@ class CNNIndonesiaIE(InfoExtractor): 'release_date': '20220911', 'uploader': 'REUTERS', 'release_timestamp': 1662869995, - } + }, }] def _real_extract(self, url): @@ -194,5 +194,5 @@ class CNNIndonesiaIE(InfoExtractor): '_type': 'url_transparent', 'url': embed_url, 'upload_date': upload_date, - 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')) + 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')), }) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1d2c443..f63bd78 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -60,7 +60,6 @@ from ..utils import ( determine_ext, dict_get, encode_data_uri, - error_to_compat_str, extract_attributes, filter_dict, fix_xml_ampersands, @@ -235,7 +234,14 @@ class InfoExtractor: 'maybe' if the format may have DRM and has to be tested before download. * extra_param_to_segment_url A query string to append to each fragment's URL, or to update each existing query string - with. Only applied by the native HLS/DASH downloaders. + with. If it is an HLS stream with an AES-128 decryption key, + the query paramaters will be passed to the key URI as well, + unless there is an `extra_param_to_key_url` given, + or unless an external key URI is provided via `hls_aes`. + Only applied by the native HLS/DASH downloaders. + * extra_param_to_key_url A query string to append to the URL + of the format's HLS AES-128 decryption key. + Only applied by the native HLS downloader. * hls_aes A dictionary of HLS AES-128 decryption information used by the native HLS downloader to override the values in the media playlist when an '#EXT-X-KEY' tag @@ -767,8 +773,8 @@ class InfoExtractor: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._x_forwarded_for_ip: self.report_warning( - 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.' - % (self._x_forwarded_for_ip, country_code.upper())) + 'Video is geo restricted. Retrying extraction with fake IP ' + f'{self._x_forwarded_for_ip} ({country_code.upper()}) as X-Forwarded-For.') return True return False @@ -841,7 +847,7 @@ class InfoExtractor: if not self._downloader._first_webpage_request: sleep_interval = self.get_param('sleep_interval_requests') or 0 if sleep_interval > 0: - self.to_screen('Sleeping %s seconds ...' % sleep_interval) + self.to_screen(f'Sleeping {sleep_interval} seconds ...') time.sleep(sleep_interval) else: self._downloader._first_webpage_request = False @@ -898,7 +904,7 @@ class InfoExtractor: if errnote is None: errnote = 'Unable to download webpage' - errmsg = f'{errnote}: {error_to_compat_str(err)}' + errmsg = f'{errnote}: {err}' if fatal: raise ExtractorError(errmsg, cause=err) else: @@ -987,7 +993,7 @@ class InfoExtractor: r'<iframe src="([^"]+)"', content, 'Websense information URL', default=None) if blocked_iframe: - msg += ' Visit %s for more details' % blocked_iframe + msg += f' Visit {blocked_iframe} for more details' raise ExtractorError(msg, expected=True) if '<title>The URL you requested has been blocked</title>' in first_block: msg = ( @@ -997,7 +1003,7 @@ class InfoExtractor: r'</h1><p>(.*?)</p>', content, 'block message', default=None) if block_msg: - msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') + msg += ' (Message: "{}")'.format(block_msg.replace('\n', ' ')) raise ExtractorError(msg, expected=True) if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and 'blocklist.rkn.gov.ru' in content): @@ -1012,7 +1018,7 @@ class InfoExtractor: basen = join_nonempty(video_id, data, url, delim='_') trim_length = self.get_param('trim_file_name') or 240 if len(basen) > trim_length: - h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() + h = '___' + hashlib.md5(basen.encode()).hexdigest() basen = basen[:trim_length - len(h)] + h filename = sanitize_filename(f'{basen}.dump', restricted=True) # Working around MAX_PATH limitation on Windows (see @@ -1063,7 +1069,7 @@ class InfoExtractor: if transform_source: xml_string = transform_source(xml_string) try: - return compat_etree_fromstring(xml_string.encode('utf-8')) + return compat_etree_fromstring(xml_string.encode()) except xml.etree.ElementTree.ParseError as ve: self.__print_error('Failed to parse XML' if errnote is None else errnote, fatal, video_id, ve) @@ -1214,11 +1220,11 @@ class InfoExtractor: def report_extraction(self, id_or_name): """Report information extraction.""" - self.to_screen('%s: Extracting information' % id_or_name) + self.to_screen(f'{id_or_name}: Extracting information') def report_download_webpage(self, video_id): """Report webpage download.""" - self.to_screen('%s: Downloading webpage' % video_id) + self.to_screen(f'{video_id}: Downloading webpage') def report_age_confirmation(self): """Report attempt to confirm age.""" @@ -1324,9 +1330,9 @@ class InfoExtractor: elif default is not NO_DEFAULT: return default elif fatal: - raise RegexNotFoundError('Unable to extract %s' % _name) + raise RegexNotFoundError(f'Unable to extract {_name}') else: - self.report_warning('unable to extract %s' % _name + bug_reports_message()) + self.report_warning(f'unable to extract {_name}' + bug_reports_message()) return None def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='', @@ -1425,14 +1431,14 @@ class InfoExtractor: if tfa is not None: return tfa - return getpass.getpass('Type %s and press [Return]: ' % note) + return getpass.getpass(f'Type {note} and press [Return]: ') # Helper functions for extracting OpenGraph info @staticmethod def _og_regexes(prop): content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))' - property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)' - % {'prop': re.escape(prop), 'sep': '(?::|[:-])'}) + property_re = r'(?:name|property)=(?:\'og{sep}{prop}\'|"og{sep}{prop}"|\s*og{sep}{prop}\b)'.format( + prop=re.escape(prop), sep='(?::|[:-])') template = r'<meta[^>]+?%s[^>]+?%s' return [ template % (property_re, content_re), @@ -1441,14 +1447,14 @@ class InfoExtractor: @staticmethod def _meta_regex(prop): - return r'''(?isx)<meta - (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1) - [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop) + return rf'''(?isx)<meta + (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?){re.escape(prop)}\1) + [^>]+?content=(["\'])(?P<content>.*?)\2''' def _og_search_property(self, prop, html, name=None, **kargs): prop = variadic(prop) if name is None: - name = 'OpenGraph %s' % prop[0] + name = f'OpenGraph {prop[0]}' og_regexes = [] for p in prop: og_regexes.extend(self._og_regexes(p)) @@ -1571,7 +1577,7 @@ class InfoExtractor: elif fatal: raise RegexNotFoundError('Unable to extract JSON-LD') else: - self.report_warning('unable to extract JSON-LD %s' % bug_reports_message()) + self.report_warning(f'unable to extract JSON-LD {bug_reports_message()}') return {} def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None): @@ -1593,8 +1599,8 @@ class InfoExtractor: } def is_type(e, *expected_types): - type = variadic(traverse_obj(e, '@type')) - return any(x in type for x in expected_types) + type_ = variadic(traverse_obj(e, '@type')) + return any(x in type_ for x in expected_types) def extract_interaction_type(e): interaction_type = e.get('interactionType') @@ -1623,7 +1629,7 @@ class InfoExtractor: count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1]) if not count_kind: continue - count_key = '%s_count' % count_kind + count_key = f'{count_kind}_count' if info.get(count_key) is not None: continue info[count_key] = interaction_count @@ -1635,7 +1641,7 @@ class InfoExtractor: 'end_time': part.get('endOffset'), } for part in variadic(e.get('hasPart') or []) if part.get('@type') == 'Clip'] for idx, (last_c, current_c, next_c) in enumerate(zip( - [{'end_time': 0}] + chapters, chapters, chapters[1:])): + [{'end_time': 0}, *chapters], chapters, chapters[1:])): current_c['end_time'] = current_c['end_time'] or next_c['start_time'] current_c['start_time'] = current_c['start_time'] or last_c['end_time'] if None in current_c.values(): @@ -1776,9 +1782,9 @@ class InfoExtractor: def _hidden_inputs(html): html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) hidden_inputs = {} - for input in re.findall(r'(?i)(<input[^>]+>)', html): - attrs = extract_attributes(input) - if not input: + for input_el in re.findall(r'(?i)(<input[^>]+>)', html): + attrs = extract_attributes(input_el) + if not input_el: continue if attrs.get('type') not in ('hidden', 'submit'): continue @@ -1790,8 +1796,8 @@ class InfoExtractor: def _form_hidden_inputs(self, form_id, html): form = self._search_regex( - r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id, - html, '%s form' % form_id, group='form') + rf'(?is)<form[^>]+?id=(["\']){form_id}\1[^>]*>(?P<form>.+?)</form>', + html, f'{form_id} form', group='form') return self._hidden_inputs(form) @classproperty(cache=True) @@ -1821,7 +1827,7 @@ class InfoExtractor: formats[:] = filter( lambda f: self._is_valid_url( f['url'], video_id, - item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'), + item='{} video format'.format(f.get('format_id')) if f.get('format_id') else 'video'), formats) @staticmethod @@ -1837,15 +1843,14 @@ class InfoExtractor: def _is_valid_url(self, url, video_id, item='video', headers={}): url = self._proto_relative_url(url, scheme='http:') # For now assume non HTTP(S) URLs always valid - if not (url.startswith('http://') or url.startswith('https://')): + if not url.startswith(('http://', 'https://')): return True try: - self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers) + self._request_webpage(url, video_id, f'Checking {item} URL', headers=headers) return True except ExtractorError as e: self.to_screen( - '%s: %s URL is invalid, skipping: %s' - % (video_id, item, error_to_compat_str(e.cause))) + f'{video_id}: {item} URL is invalid, skipping: {e.cause!s}') return False def http_scheme(self): @@ -1899,8 +1904,8 @@ class InfoExtractor: # currently yt-dlp cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: - playerVerificationChallenge = akamai_pv.text.split(';')[0] - if playerVerificationChallenge.strip() != '': + player_verification_challenge = akamai_pv.text.split(';')[0] + if player_verification_challenge.strip() != '': return [] formats = [] @@ -1946,7 +1951,7 @@ class InfoExtractor: if not media_url: continue manifest_url = ( - media_url if media_url.startswith('http://') or media_url.startswith('https://') + media_url if media_url.startswith(('http://', 'https://')) else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url)) # If media_url is itself a f4m manifest do the recursive extraction # since bitrates in parent manifest (this one) and media_url manifest @@ -2007,7 +2012,7 @@ class InfoExtractor: def _report_ignoring_subs(self, name): self.report_warning(bug_reports_message( f'Ignoring subtitle tracks found in the {name} manifest; ' - 'if any subtitle tracks are missing,' + 'if any subtitle tracks are missing,', ), only_once=True) def _extract_m3u8_formats(self, *args, **kwargs): @@ -2098,7 +2103,7 @@ class InfoExtractor: formats = [{ 'format_id': join_nonempty(m3u8_id, idx), 'format_index': idx, - 'url': m3u8_url or encode_data_uri(m3u8_doc.encode('utf-8'), 'application/x-mpegurl'), + 'url': m3u8_url or encode_data_uri(m3u8_doc.encode(), 'application/x-mpegurl'), 'ext': ext, 'protocol': entry_protocol, 'preference': preference, @@ -2217,6 +2222,11 @@ class InfoExtractor: 'quality': quality, 'has_drm': has_drm, } + + # YouTube-specific + if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'): + f['language'] = yt_audio_content_id.split('.')[0] + resolution = last_stream_inf.get('RESOLUTION') if resolution: mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution) @@ -2310,7 +2320,7 @@ class InfoExtractor: if not c or c == '.': out.append(c) else: - out.append('{%s}%s' % (namespace, c)) + out.append(f'{{{namespace}}}{c}') return '/'.join(out) def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None): @@ -2507,7 +2517,7 @@ class InfoExtractor: imgs_count += 1 formats.append({ - 'format_id': 'imagestream-%d' % (imgs_count), + 'format_id': f'imagestream-{imgs_count}', 'url': src, 'ext': mimetype2ext(medium.get('type')), 'acodec': 'none', @@ -2525,7 +2535,7 @@ class InfoExtractor: def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): urls = [] subtitles = {} - for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))): + for textstream in smil.findall(self._xpath_ns('.//textstream', namespace)): src = textstream.get('src') if not src or src in urls: continue @@ -2656,7 +2666,7 @@ class InfoExtractor: if subtitles and period['subtitles']: self.report_warning(bug_reports_message( 'Found subtitles in multiple periods in the DASH manifest; ' - 'if part of the subtitles are missing,' + 'if part of the subtitles are missing,', ), only_once=True) for sub_lang, sub_info in period['subtitles'].items(): @@ -2782,7 +2792,7 @@ class InfoExtractor: elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): content_type = 'text' else: - self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) + self.report_warning(f'Unknown MIME type {mime_type} in DASH manifest') continue base_url = '' @@ -2820,10 +2830,10 @@ class InfoExtractor: 'asr': int_or_none(representation_attrib.get('audioSamplingRate')), 'fps': int_or_none(representation_attrib.get('frameRate')), 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None, - 'format_note': 'DASH %s' % content_type, + 'format_note': f'DASH {content_type}', 'filesize': filesize, 'container': mimetype2ext(mime_type) + '_dash', - **codecs + **codecs, } elif content_type == 'text': f = { @@ -2864,8 +2874,8 @@ class InfoExtractor: t += c # Next, $...$ templates are translated to their # %(...) counterparts to be used with % operator - t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t) - t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t) + t = re.sub(r'\$({})\$'.format('|'.join(identifiers)), r'%(\1)d', t) + t = re.sub(r'\$({})%([^$]+)\$'.format('|'.join(identifiers)), r'%(\1)\2', t) t.replace('$$', '$') return t @@ -2928,12 +2938,12 @@ class InfoExtractor: 'duration': float_or_none(segment_d, representation_ms_info['timescale']), }) - for num, s in enumerate(representation_ms_info['s']): + for s in representation_ms_info['s']: segment_time = s.get('t') or segment_time segment_d = s['d'] add_segment_url() segment_number += 1 - for r in range(s.get('r', 0)): + for _ in range(s.get('r', 0)): segment_time += segment_d add_segment_url() segment_number += 1 @@ -2947,7 +2957,7 @@ class InfoExtractor: timescale = representation_ms_info['timescale'] for s in representation_ms_info['s']: duration = float_or_none(s['d'], timescale) - for r in range(s.get('r', 0) + 1): + for _ in range(s.get('r', 0) + 1): segment_uri = representation_ms_info['segment_urls'][segment_index] fragments.append({ location_key(segment_uri): segment_uri, @@ -3055,7 +3065,7 @@ class InfoExtractor: fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag')) # TODO: add support for WVC1 and WMAP if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML', 'EC-3'): - self.report_warning('%s is not a supported codec' % fourcc) + self.report_warning(f'{fourcc} is not a supported codec') continue tbr = int(track.attrib['Bitrate']) // 1000 # [1] does not mention Width and Height attributes. However, @@ -3104,7 +3114,7 @@ class InfoExtractor: 'fourcc': fourcc, 'language': stream_language, 'codec_private_data': track.get('CodecPrivateData'), - } + }, }) elif stream_type in ('video', 'audio'): formats.append({ @@ -3186,13 +3196,13 @@ class InfoExtractor: _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)' media_tags = [(media_tag, media_tag_name, media_type, '') for media_tag, media_tag_name, media_type - in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)] + in re.findall(rf'(?s)(<({_MEDIA_TAG_NAME_RE})[^>]*/>)', webpage)] media_tags.extend(re.findall( # We only allow video|audio followed by a whitespace or '>'. # Allowing more characters may end up in significant slow down (see # https://github.com/ytdl-org/youtube-dl/issues/11979, # e.g. http://www.porntrex.com/maps/videositemap.xml). - r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage)) + rf'(?s)(<(?P<tag>{_MEDIA_TAG_NAME_RE})(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage)) for media_tag, _, media_type, media_content in media_tags: media_info = { 'formats': [], @@ -3336,13 +3346,13 @@ class InfoExtractor: mobj = re.search( r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url) url_base = mobj.group('url') - http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base) + http_base_url = '{}{}:{}'.format('http', mobj.group('s') or '', url_base) formats = [] def manifest_url(manifest): m_url = f'{http_base_url}/{manifest}' if query: - m_url += '?%s' % query + m_url += f'?{query}' return m_url if 'm3u8' not in skip_protocols: @@ -3364,7 +3374,7 @@ class InfoExtractor: video_id, fatal=False) for rtmp_format in rtmp_formats: rtsp_format = rtmp_format.copy() - rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) + rtsp_format['url'] = '{}/{}'.format(rtmp_format['url'], rtmp_format['play_path']) del rtsp_format['play_path'] del rtsp_format['ext'] rtsp_format.update({ @@ -3431,7 +3441,7 @@ class InfoExtractor: if not track_url: continue subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track_url) + 'url': self._proto_relative_url(track_url), }) entry = { @@ -3510,7 +3520,7 @@ class InfoExtractor: 'tbr': int_or_none(source.get('bitrate'), scale=1000), 'filesize': int_or_none(source.get('filesize')), 'ext': ext, - 'format_id': format_id + 'format_id': format_id, } if source_url.startswith('rtmp'): a_format['ext'] = 'flv' @@ -3584,7 +3594,7 @@ class InfoExtractor: continue cookies = cookies.encode('iso-8859-1').decode('utf-8') cookie_value = re.search( - r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies) + rf'{cookie}=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)', cookies) if cookie_value: value, domain = cookie_value.groups() self._set_cookie(domain, cookie, value) @@ -3668,7 +3678,7 @@ class InfoExtractor: desc += ' (**Currently broken**)' if markdown else ' (Currently broken)' # Escape emojis. Ref: https://github.com/github/markup/issues/1153 - name = (' - **%s**' % re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME)) if markdown else cls.IE_NAME + name = (' - **{}**'.format(re.sub(r':(\w+:)', ':\u200B\\g<1>', cls.IE_NAME))) if markdown else cls.IE_NAME return f'{name}:{desc}' if desc else name def extract_subtitles(self, *args, **kwargs): @@ -3708,7 +3718,7 @@ class InfoExtractor: self.to_screen(f'Extracted {comment_count} comments') return { 'comments': comments, - 'comment_count': None if interrupted else comment_count + 'comment_count': None if interrupted else comment_count, } return extractor @@ -3812,9 +3822,9 @@ class InfoExtractor: @staticmethod def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None): - all_known = all(map( - lambda x: x is not None, - (is_private, needs_premium, needs_subscription, needs_auth, is_unlisted))) + all_known = all( + x is not None for x in + (is_private, needs_premium, needs_subscription, needs_auth, is_unlisted)) return ( 'private' if is_private else 'premium_only' if needs_premium @@ -3934,7 +3944,7 @@ class SearchInfoExtractor(InfoExtractor): @classproperty def _VALID_URL(cls): - return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY + return rf'{cls._SEARCH_KEY}(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' def _real_extract(self, query): prefix, query = self._match_valid_url(query).group('prefix', 'query') diff --git a/yt_dlp/extractor/commonmistakes.py b/yt_dlp/extractor/commonmistakes.py index 4514424..8ddb164 100644 --- a/yt_dlp/extractor/commonmistakes.py +++ b/yt_dlp/extractor/commonmistakes.py @@ -16,10 +16,10 @@ class CommonMistakesIE(InfoExtractor): def _real_extract(self, url): msg = ( - 'You\'ve asked yt-dlp to download the URL "%s". ' + f'You\'ve asked yt-dlp to download the URL "{url}". ' 'That doesn\'t make any sense. ' 'Simply remove the parameter in your command or configuration.' - ) % url + ) if not self.get_param('verbose'): msg += ' Add -v to the command line to see what arguments and configuration yt-dlp has' raise ExtractorError(msg, expected=True) @@ -38,7 +38,7 @@ class UnicodeBOMIE(InfoExtractor): real_url = self._match_id(url) self.report_warning( 'Your URL starts with a Byte Order Mark (BOM). ' - 'Removing the BOM and looking for "%s" ...' % real_url) + f'Removing the BOM and looking for "{real_url}" ...') return self.url_result(real_url) diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py index 2f93e8e..7b3a5b6 100644 --- a/yt_dlp/extractor/commonprotocols.py +++ b/yt_dlp/extractor/commonprotocols.py @@ -63,7 +63,7 @@ class ViewSourceIE(InfoExtractor): _TEST = { 'url': 'view-source:https://www.youtube.com/watch?v=BaW_jenozKc', - 'only_matching': True + 'only_matching': True, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/condenast.py b/yt_dlp/extractor/condenast.py index 3170c29..9c02cd3 100644 --- a/yt_dlp/extractor/condenast.py +++ b/yt_dlp/extractor/condenast.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, - compat_urlparse, -) from ..utils import ( determine_ext, extract_attributes, @@ -48,20 +45,20 @@ class CondeNastIE(InfoExtractor): 'wmagazine': 'W Magazine', } - _VALID_URL = r'''(?x)https?://(?:video|www|player(?:-backend)?)\.(?:%s)\.com/ + _VALID_URL = r'''(?x)https?://(?:video|www|player(?:-backend)?)\.(?:{})\.com/ (?: (?: embed(?:js)?| (?:script|inline)/video - )/(?P<id>[0-9a-f]{24})(?:/(?P<player_id>[0-9a-f]{24}))?(?:.+?\btarget=(?P<target>[^&]+))?| + )/(?P<id>[0-9a-f]{{24}})(?:/(?P<player_id>[0-9a-f]{{24}}))?(?:.+?\btarget=(?P<target>[^&]+))?| (?P<type>watch|series|video)/(?P<display_id>[^/?#]+) - )''' % '|'.join(_SITES.keys()) - IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) + )'''.format('|'.join(_SITES.keys())) + IE_DESC = 'Condé Nast media group: {}'.format(', '.join(sorted(_SITES.values()))) _EMBED_REGEX = [r'''(?x) <(?:iframe|script)[^>]+?src=(["\'])(?P<url> - (?:https?:)?//player(?:-backend)?\.(?:%s)\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+? - )\1''' % '|'.join(_SITES.keys())] + (?:https?:)?//player(?:-backend)?\.(?:{})\.com/(?:embed(?:js)?|(?:script|inline)/video)/.+? + )\1'''.format('|'.join(_SITES.keys()))] _TESTS = [{ 'url': 'http://video.wired.com/watch/3d-printed-speakers-lit-with-led', @@ -74,7 +71,7 @@ class CondeNastIE(InfoExtractor): 'uploader': 'wired', 'upload_date': '20130314', 'timestamp': 1363219200, - } + }, }, { 'url': 'http://video.gq.com/watch/the-closer-with-keith-olbermann-the-only-true-surprise-trump-s-an-idiot?c=series', 'info_dict': { @@ -97,7 +94,7 @@ class CondeNastIE(InfoExtractor): 'uploader': 'arstechnica', 'upload_date': '20150916', 'timestamp': 1442434920, - } + }, }, { 'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', 'only_matching': True, @@ -110,12 +107,12 @@ class CondeNastIE(InfoExtractor): title = self._html_search_regex( r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>', webpage, 'series title') - url_object = compat_urllib_parse_urlparse(url) - base_url = '%s://%s' % (url_object.scheme, url_object.netloc) + url_object = urllib.parse.urlparse(url) + base_url = f'{url_object.scheme}://{url_object.netloc}' m_paths = re.finditer( r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage) paths = orderedSet(m.group(1) for m in m_paths) - build_url = lambda path: compat_urlparse.urljoin(base_url, path) + build_url = lambda path: urllib.parse.urljoin(base_url, path) entries = [self.url_result(build_url(path), 'CondeNast') for path in paths] return self.playlist_result(entries, playlist_title=title) @@ -166,9 +163,9 @@ class CondeNastIE(InfoExtractor): video_id, 'Downloading loader info', query=params) if not video_info: info_page = self._download_webpage( - 'https://player.cnevids.com/inline/video/%s.js' % video_id, + f'https://player.cnevids.com/inline/video/{video_id}.js', video_id, 'Downloading inline info', query={ - 'target': params.get('target', 'embedplayer') + 'target': params.get('target', 'embedplayer'), }) if not video_info: @@ -192,7 +189,7 @@ class CondeNastIE(InfoExtractor): continue quality = fdata.get('quality') formats.append({ - 'format_id': ext + ('-%s' % quality if quality else ''), + 'format_id': ext + (f'-{quality}' if quality else ''), 'url': src, 'ext': ext, 'quality': 1 if quality == 'high' else 0, diff --git a/yt_dlp/extractor/contv.py b/yt_dlp/extractor/contv.py index d69e816..63d760a 100644 --- a/yt_dlp/extractor/contv.py +++ b/yt_dlp/extractor/contv.py @@ -73,7 +73,7 @@ class CONtvIE(InfoExtractor): captions = m_details.get('captions') or {} for caption_url in captions.values(): subtitles.setdefault('en', []).append({ - 'url': caption_url + 'url': caption_url, }) thumbnails = [] diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index 0a98c98..4af2d18 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -69,16 +69,16 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE 'only_matching': True, }, { 'url': 'http://www.bigbrothercanada.ca/video/1457812035894/', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/', - 'only_matching': True + 'only_matching': True, }] _GEO_BYPASS = False _SITE_MAP = { @@ -97,7 +97,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE if path != 'series': path = 'migration/' + path video = self._download_json( - 'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path, + f'https://globalcontent.corusappservices.com/templates/{path}/playlist/', video_id, query={'byId': video_id}, headers={'Accept': 'application/json'})[0] title = video['title'] @@ -108,7 +108,7 @@ class CorusIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE if not smil_url: continue source_type = source.get('type') - note = 'Downloading%s smil file' % (' ' + source_type if source_type else '') + note = 'Downloading{} smil file'.format(' ' + source_type if source_type else '') resp = self._download_webpage( smil_url, video_id, note, fatal=False, headers=self.geo_verification_headers()) diff --git a/yt_dlp/extractor/coub.py b/yt_dlp/extractor/coub.py index 9bab698..95ad665 100644 --- a/yt_dlp/extractor/coub.py +++ b/yt_dlp/extractor/coub.py @@ -44,11 +44,11 @@ class CoubIE(InfoExtractor): video_id = self._match_id(url) coub = self._download_json( - 'http://coub.com/api/v2/coubs/%s.json' % video_id, video_id) + f'http://coub.com/api/v2/coubs/{video_id}.json', video_id) if coub.get('error'): raise ExtractorError( - '%s said: %s' % (self.IE_NAME, coub['error']), expected=True) + '{} said: {}'.format(self.IE_NAME, coub['error']), expected=True) title = coub['title'] @@ -80,7 +80,7 @@ class CoubIE(InfoExtractor): continue formats.append({ 'url': item_url, - 'format_id': '%s-%s-%s' % (HTML5, kind, quality), + 'format_id': f'{HTML5}-{kind}-{quality}', 'filesize': int_or_none(item.get('size')), 'vcodec': 'none' if kind == 'audio' else None, 'acodec': 'none' if kind == 'video' else None, @@ -100,7 +100,7 @@ class CoubIE(InfoExtractor): if mobile_url: formats.append({ 'url': mobile_url, - 'format_id': '%s-audio' % MOBILE, + 'format_id': f'{MOBILE}-audio', 'source_preference': preference_key(MOBILE), }) diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py index 5ef5afc..b84dd2a 100644 --- a/yt_dlp/extractor/cozytv.py +++ b/yt_dlp/extractor/cozytv.py @@ -16,17 +16,17 @@ class CozyTVIE(InfoExtractor): 'was_live': True, 'duration': 7981, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): uploader, date = self._match_valid_url(url).groups() - id = f'{uploader}-{date}' - data_json = self._download_json(f'https://api.cozy.tv/cache/{uploader}/replay/{date}', id) + video_id = f'{uploader}-{date}' + data_json = self._download_json(f'https://api.cozy.tv/cache/{uploader}/replay/{date}', video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles( - f'https://cozycdn.foxtrotstream.xyz/replays/{uploader}/{date}/index.m3u8', id, ext='mp4') + f'https://cozycdn.foxtrotstream.xyz/replays/{uploader}/{date}/index.m3u8', video_id, ext='mp4') return { - 'id': id, + 'id': video_id, 'title': data_json.get('title'), 'uploader': data_json.get('user') or uploader, 'upload_date': unified_strdate(data_json.get('date')), diff --git a/yt_dlp/extractor/cpac.py b/yt_dlp/extractor/cpac.py index 32bba1e..08d79a2 100644 --- a/yt_dlp/extractor/cpac.py +++ b/yt_dlp/extractor/cpac.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, str_or_none, @@ -38,11 +37,11 @@ class CPACIE(InfoExtractor): content = self._download_json( 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/episode/index.xml&crafterSite=cpacca&id=' + video_id, video_id) - video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], compat_str) + video_url = try_get(content, lambda x: x['page']['details']['videoUrl'], str) formats = [] if video_url: content = content['page'] - title = str_or_none(content['details']['title_%s_t' % (url_lang, )]) + title = str_or_none(content['details'][f'title_{url_lang}_t']) formats = self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4') for fmt in formats: # prefer language to match URL @@ -54,7 +53,7 @@ class CPACIE(InfoExtractor): else: fmt['language_preference'] = -10 - category = str_or_none(content['details']['category_%s_t' % (url_lang, )]) + category = str_or_none(content['details'][f'category_{url_lang}_t']) def is_live(v_type): return (v_type == 'live') if v_type is not None else None @@ -63,10 +62,10 @@ class CPACIE(InfoExtractor): 'id': video_id, 'formats': formats, 'title': title, - 'description': str_or_none(content['details'].get('description_%s_t' % (url_lang, ))), + 'description': str_or_none(content['details'].get(f'description_{url_lang}_t')), 'timestamp': unified_timestamp(content['details'].get('liveDateTime')), 'categories': [category] if category else None, - 'thumbnail': urljoin(url, str_or_none(content['details'].get('image_%s_s' % (url_lang, )))), + 'thumbnail': urljoin(url, str_or_none(content['details'].get(f'image_{url_lang}_s'))), 'is_live': is_live(content['details'].get('type')), } @@ -110,27 +109,26 @@ class CPACPlaylistIE(InfoExtractor): url_lang = 'fr' if any(x in url for x in ('/emission?', '/rechercher?')) else 'en' pl_type, list_type = ('program', 'itemList') if any(x in url for x in ('/program?', '/emission?')) else ('search', 'searchResult') api_url = ( - 'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/%s/index.xml&crafterSite=cpacca&%s' - % (pl_type, video_id, )) + f'https://www.cpac.ca/api/1/services/contentModel.json?url=/site/website/{pl_type}/index.xml&crafterSite=cpacca&{video_id}') content = self._download_json(api_url, video_id) entries = [] total_pages = int_or_none(try_get(content, lambda x: x['page'][list_type]['totalPages']), default=1) for page in range(1, total_pages + 1): if page > 1: - api_url = update_url_query(api_url, {'page': '%d' % (page, ), }) + api_url = update_url_query(api_url, {'page': page}) content = self._download_json( api_url, video_id, - note='Downloading continuation - %d' % (page, ), + note=f'Downloading continuation - {page}', fatal=False) for item in try_get(content, lambda x: x['page'][list_type]['item'], list) or []: - episode_url = urljoin(url, try_get(item, lambda x: x['url_%s_s' % (url_lang, )])) + episode_url = urljoin(url, try_get(item, lambda x: x[f'url_{url_lang}_s'])) if episode_url: entries.append(episode_url) return self.playlist_result( (self.url_result(entry) for entry in entries), playlist_id=video_id, - playlist_title=try_get(content, lambda x: x['page']['program']['title_%s_t' % (url_lang, )]) or video_id.split('=')[-1], - playlist_description=try_get(content, lambda x: x['page']['program']['description_%s_t' % (url_lang, )]), + playlist_title=try_get(content, lambda x: x['page']['program'][f'title_{url_lang}_t']) or video_id.split('=')[-1], + playlist_description=try_get(content, lambda x: x['page']['program'][f'description_{url_lang}_t']), ) diff --git a/yt_dlp/extractor/cracked.py b/yt_dlp/extractor/cracked.py index c6aabcc..f227654 100644 --- a/yt_dlp/extractor/cracked.py +++ b/yt_dlp/extractor/cracked.py @@ -19,7 +19,7 @@ class CrackedIE(InfoExtractor): 'title': 'If Animal Actors Got E! True Hollywood Stories', 'timestamp': 1404954000, 'upload_date': '20140710', - } + }, }, { # youtube embed 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html', @@ -32,7 +32,7 @@ class CrackedIE(InfoExtractor): 'upload_date': '20140725', 'uploader_id': 'Cracked', 'uploader': 'Cracked', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py index 0cb7d94..c4ceba9 100644 --- a/yt_dlp/extractor/crackle.py +++ b/yt_dlp/extractor/crackle.py @@ -45,7 +45,7 @@ class CrackleIE(InfoExtractor): 'skip_download': True, }, 'expected_warnings': [ - 'Trying with a list of known countries' + 'Trying with a list of known countries', ], }, { 'url': 'https://www.sonycrackle.com/thanksgiving/2510064', @@ -89,7 +89,7 @@ class CrackleIE(InfoExtractor): for num, country in enumerate(countries): if num == 1: # start hard-coded list self.report_warning('%s. Trying with a list of known countries' % ( - 'Unable to obtain video formats from %s API' % geo_bypass_country if geo_bypass_country + f'Unable to obtain video formats from {geo_bypass_country} API' if geo_bypass_country else 'No country code was given using --geo-bypass-country')) elif num == num_countries: # end of list geo_info = self._download_json( @@ -99,17 +99,17 @@ class CrackleIE(InfoExtractor): country = geo_info.get('CountryCode') if country is None: continue - self.to_screen('%s identified country as %s' % (self.IE_NAME, country)) + self.to_screen(f'{self.IE_NAME} identified country as {country}') if country in countries: - self.to_screen('Downloading from %s API was already attempted. Skipping...' % country) + self.to_screen(f'Downloading from {country} API was already attempted. Skipping...') continue if country is None: continue try: media = self._download_json( - 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country), - video_id, note='Downloading media JSON from %s API' % country, + f'https://web-api-us.crackle.com/Service.svc/details/media/{video_id}/{country}?disableProtocols=true', + video_id, note=f'Downloading media JSON from {country} API', errnote='Unable to download media JSON') except ExtractorError as e: # 401 means geo restriction, trying next country @@ -120,7 +120,7 @@ class CrackleIE(InfoExtractor): status = media.get('status') if status.get('messageCode') != '0': raise ExtractorError( - '%s said: %s %s - %s' % ( + '{} said: {} {} - {}'.format( self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')), expected=True) diff --git a/yt_dlp/extractor/craftsy.py b/yt_dlp/extractor/craftsy.py index 3a05ed4..0d7d759 100644 --- a/yt_dlp/extractor/craftsy.py +++ b/yt_dlp/extractor/craftsy.py @@ -56,7 +56,7 @@ class CraftsyIE(InfoExtractor): if not lessons and not has_access: self.report_warning( 'Only extracting preview. For the full class, pass cookies ' - + f'from an account that has access. {self._login_hint()}') + f'from an account that has access. {self._login_hint()}') lessons.append({'video_id': preview_id}) if not lessons and not has_access: diff --git a/yt_dlp/extractor/crooksandliars.py b/yt_dlp/extractor/crooksandliars.py index 2ee0730..abd3322 100644 --- a/yt_dlp/extractor/crooksandliars.py +++ b/yt_dlp/extractor/crooksandliars.py @@ -21,7 +21,7 @@ class CrooksAndLiarsIE(InfoExtractor): 'upload_date': '20150405', 'uploader': 'Heather', 'duration': 236, - } + }, }, { 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', 'only_matching': True, @@ -31,7 +31,7 @@ class CrooksAndLiarsIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) + f'http://embed.crooksandliars.com/embed/{video_id}', video_id) manifest = self._search_json(r'var\s+manifest\s*=', webpage, 'manifest JSON', video_id) diff --git a/yt_dlp/extractor/crowdbunker.py b/yt_dlp/extractor/crowdbunker.py index d83c015..bf81457 100644 --- a/yt_dlp/extractor/crowdbunker.py +++ b/yt_dlp/extractor/crowdbunker.py @@ -24,15 +24,16 @@ class CrowdBunkerIE(InfoExtractor): 'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ', 'like_count': int, 'upload_date': '20211218', - 'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg' + 'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): - id = self._match_id(url) - data_json = self._download_json(f'https://api.divulg.org/post/{id}/details', - id, headers={'accept': 'application/json, text/plain, */*'}) + video_id = self._match_id(url) + data_json = self._download_json( + f'https://api.divulg.org/post/{video_id}/details', video_id, + headers={'accept': 'application/json, text/plain, */*'}) video_json = data_json['video'] formats, subtitles = [], {} for sub in video_json.get('captions') or []: @@ -45,12 +46,12 @@ class CrowdBunkerIE(InfoExtractor): mpd_url = try_get(video_json, lambda x: x['dashManifest']['url']) if mpd_url: - fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, id) + fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id) formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url']) if m3u8_url: - fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, id) + fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, video_id) formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) @@ -61,7 +62,7 @@ class CrowdBunkerIE(InfoExtractor): } for image in video_json.get('thumbnails') or [] if image.get('url')] return { - 'id': id, + 'id': video_id, 'title': video_json.get('title'), 'description': video_json.get('description'), 'view_count': video_json.get('viewCount'), @@ -87,23 +88,24 @@ class CrowdBunkerChannelIE(InfoExtractor): }, }] - def _entries(self, id): + def _entries(self, playlist_id): last = None for page in itertools.count(): channel_json = self._download_json( - f'https://api.divulg.org/organization/{id}/posts', id, headers={'accept': 'application/json, text/plain, */*'}, + f'https://api.divulg.org/organization/{playlist_id}/posts', playlist_id, + headers={'accept': 'application/json, text/plain, */*'}, query={'after': last} if last else {}, note=f'Downloading Page {page}') for item in channel_json.get('items') or []: v_id = item.get('uid') if not v_id: continue yield self.url_result( - 'https://crowdbunker.com/v/%s' % v_id, ie=CrowdBunkerIE.ie_key(), video_id=v_id) + f'https://crowdbunker.com/v/{v_id}', ie=CrowdBunkerIE.ie_key(), video_id=v_id) last = channel_json.get('last') if not last: break def _real_extract(self, url): - id = self._match_id(url) - return self.playlist_result(self._entries(id), playlist_id=id) + playlist_id = self._match_id(url) + return self.playlist_result(self._entries(playlist_id), playlist_id=playlist_id) diff --git a/yt_dlp/extractor/crtvg.py b/yt_dlp/extractor/crtvg.py index 21325e3..6d9a778 100644 --- a/yt_dlp/extractor/crtvg.py +++ b/yt_dlp/extractor/crtvg.py @@ -17,7 +17,7 @@ class CrtvgIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.(?:jpg|png)', '_old_archive_ids': ['crtvg 5839623'], }, - 'params': {'skip_download': 'm3u8'} + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.crtvg.es/tvg/a-carta/a-parabolica-love-story', 'md5': '9a47b95a1749db7b7eb3214904624584', @@ -28,7 +28,7 @@ class CrtvgIE(InfoExtractor): 'description': 'md5:f71cfba21ae564f0a6f415b31de1f842', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', }, - 'params': {'skip_download': 'm3u8'} + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index ea54f01..1b124c6 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -442,7 +442,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE): return { 'id': data['id'], 'title': ' \u2013 '.join(( - ('%s%s' % ( + ('{}{}'.format( format_field(metadata, 'season_title'), format_field(metadata, 'episode', ' Episode %s'))), format_field(data, 'title'))), @@ -519,7 +519,7 @@ class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE): seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons') for season in traverse_obj(seasons_response, ('items', ..., {dict})): episodes_response = self._call_cms_api_signed( - f'episodes?season_id={season["id"]}', season["id"], lang, 'episode list') + f'episodes?season_id={season["id"]}', season['id'], lang, 'episode list') for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})): yield self.url_result( f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}', @@ -535,7 +535,7 @@ class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE): 'url': ('source', {url_or_none}), 'width': ('width', {int_or_none}), 'height': ('height', {int_or_none}), - }) + }), }))) diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index e56584e..e940c2d 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -43,7 +43,7 @@ class CSpanIE(InfoExtractor): 'ext': 'mp4', 'title': 'CSPAN - International Health Care Models', 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967', - } + }, }, { 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall', 'info_dict': { @@ -61,7 +61,7 @@ class CSpanIE(InfoExtractor): }, 'params': { 'skip_download': True, # m3u8 downloads - } + }, }, { # Ustream embedded video 'url': 'https://www.c-span.org/video/?114917-1/armed-services', @@ -151,7 +151,7 @@ class CSpanIE(InfoExtractor): # Obsolete # We first look for clipid, because clipprog always appears before - patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] + patterns = [rf'id=\'clip({t})\'\s*value=\'([0-9]+)\'' for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) if results: matches = results[0] @@ -183,13 +183,13 @@ class CSpanIE(InfoExtractor): return d.get(attr, {}).get('#text') data = self._download_json( - 'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5=%s&id=%s' % (video_type, video_id), + f'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5={video_type}&id={video_id}', video_id)['video'] if data['@status'] != 'Success': - raise ExtractorError('%s said: %s' % (self.IE_NAME, get_text_attr(data, 'error')), expected=True) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, get_text_attr(data, 'error')), expected=True) doc = self._download_xml( - 'http://www.c-span.org/common/services/flashXml.php?%sid=%s' % (video_type, video_id), + f'http://www.c-span.org/common/services/flashXml.php?{video_type}id={video_id}', video_id) description = self._html_search_meta('description', webpage) @@ -205,7 +205,7 @@ class CSpanIE(InfoExtractor): formats = [] for quality in f.get('qualities', []): formats.append({ - 'format_id': '%s-%sp' % (get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), + 'format_id': '{}-{}p'.format(get_text_attr(quality, 'bitrate'), get_text_attr(quality, 'height')), 'url': unescapeHTML(get_text_attr(quality, 'file')), 'height': int_or_none(get_text_attr(quality, 'height')), 'tbr': int_or_none(get_text_attr(quality, 'bitrate')), @@ -216,13 +216,13 @@ class CSpanIE(InfoExtractor): continue formats = self._extract_m3u8_formats( path, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }] + m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path}] add_referer(formats) entries.append({ - 'id': '%s_%d' % (video_id, partnum + 1), + 'id': f'{video_id}_{partnum + 1}', 'title': ( title if len(files) == 1 else - '%s part %d' % (title, partnum + 1)), + f'{title} part {partnum + 1}'), 'formats': formats, 'description': description, 'thumbnail': thumbnail, @@ -230,7 +230,7 @@ class CSpanIE(InfoExtractor): 'subtitles': { 'en': [{ 'url': capfile, - 'ext': determine_ext(capfile, 'dfxp') + 'ext': determine_ext(capfile, 'dfxp'), }], } if capfile else None, }) @@ -257,8 +257,8 @@ class CSpanCongressIE(InfoExtractor): 'title': 'Congressional Chronicle - Members of Congress, Hearings and More', 'description': 'md5:54c264b7a8f219937987610243305a84', 'thumbnail': r're:https://ximage.c-spanvideo.org/.+', - 'ext': 'mp4' - } + 'ext': 'mp4', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py index 1817bd2..b249c7b 100644 --- a/yt_dlp/extractor/ctsnews.py +++ b/yt_dlp/extractor/ctsnews.py @@ -16,7 +16,7 @@ class CtsNewsIE(InfoExtractor): 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...', 'timestamp': 1422528540, 'upload_date': '20150129', - } + }, }, { # News count not appear on page but still available in database 'url': 'http://news.cts.com.tw/cts/international/201309/201309031304098.html', @@ -29,7 +29,7 @@ class CtsNewsIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1378205880, 'upload_date': '20130903', - } + }, }, { # With Youtube embedded video 'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html', diff --git a/yt_dlp/extractor/ctv.py b/yt_dlp/extractor/ctv.py index f125c1c..a41dab1 100644 --- a/yt_dlp/extractor/ctv.py +++ b/yt_dlp/extractor/ctv.py @@ -41,9 +41,9 @@ class CTVIE(InfoExtractor): } } } -}''' % display_id, +}''' % display_id, # noqa: UP031 })['data']['resolvedPath']['lastSegment']['content'] video_id = content['axisId'] return self.url_result( - '9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id), + '9c9media:{}:{}'.format(content['videoPlayerDestCode'], video_id), 'NineCNineMedia', video_id) diff --git a/yt_dlp/extractor/ctvnews.py b/yt_dlp/extractor/ctvnews.py index ad3f0d8..ebed9eb 100644 --- a/yt_dlp/extractor/ctvnews.py +++ b/yt_dlp/extractor/ctvnews.py @@ -16,7 +16,7 @@ class CTVNewsIE(InfoExtractor): 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', 'timestamp': 1467286284, 'upload_date': '20160630', - } + }, }, { 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', 'info_dict': @@ -49,14 +49,14 @@ class CTVNewsIE(InfoExtractor): return { '_type': 'url_transparent', 'id': clip_id, - 'url': '9c9media:ctvnews_web:%s' % clip_id, + 'url': f'9c9media:ctvnews_web:{clip_id}', 'ie_key': 'NineCNineMedia', } if page_id.isdigit(): return ninecninemedia_url_result(page_id) else: - webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={ + webpage = self._download_webpage(f'http://www.ctvnews.ca/{page_id}', page_id, query={ 'ot': 'example.AjaxPageLayout.ot', 'maxItemsPerPage': 1000000, }) diff --git a/yt_dlp/extractor/cultureunplugged.py b/yt_dlp/extractor/cultureunplugged.py index 9c8509f..8e6579c 100644 --- a/yt_dlp/extractor/cultureunplugged.py +++ b/yt_dlp/extractor/cultureunplugged.py @@ -20,7 +20,7 @@ class CultureUnpluggedIE(InfoExtractor): 'creator': 'Coldstream Creative', 'duration': 2203, 'view_count': int, - } + }, }, { 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662', 'only_matching': True, @@ -35,7 +35,7 @@ class CultureUnpluggedIE(InfoExtractor): self._request_webpage(HEADRequest( 'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id) movie_data = self._download_json( - 'http://www.cultureunplugged.com/movie-data/cu-%s.json' % video_id, display_id) + f'http://www.cultureunplugged.com/movie-data/cu-{video_id}.json', display_id) video_url = movie_data['url'] title = movie_data['title'] @@ -46,11 +46,11 @@ class CultureUnpluggedIE(InfoExtractor): view_count = int_or_none(movie_data.get('views')) thumbnails = [{ - 'url': movie_data['%s_thumb' % size], + 'url': movie_data[f'{size}_thumb'], 'id': size, 'preference': preference, } for preference, size in enumerate(( - 'small', 'large')) if movie_data.get('%s_thumb' % size)] + 'small', 'large')) if movie_data.get(f'{size}_thumb')] return { 'id': video_id, diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 941cf4e..f5a2c3c 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -2,7 +2,6 @@ import re import urllib.parse from .common import InfoExtractor -from ..compat import compat_str from ..utils import ExtractorError, int_or_none, urlencode_postdata @@ -16,7 +15,7 @@ class CuriosityStreamBaseIE(InfoExtractor): if isinstance(error, dict): error = ', '.join(error.values()) raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error), expected=True) + f'{self.IE_NAME} said: {error}', expected=True) def _call_api(self, path, video_id, query=None): headers = {} @@ -59,7 +58,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): 'series_id': '2', 'thumbnail': r're:https://img.curiositystream.com/.+\.jpg', 'tags': [], - 'duration': 158 + 'duration': 158, }, 'params': { # m3u8 download @@ -157,10 +156,10 @@ class CuriosityStreamCollectionBaseIE(CuriosityStreamBaseIE): collection = self._call_api(collection_id, collection_id) entries = [] for media in collection.get('media', []): - media_id = compat_str(media.get('id')) + media_id = str(media.get('id')) media_type, ie = ('series', CuriosityStreamSeriesIE) if media.get('is_collection') else ('video', CuriosityStreamIE) entries.append(self.url_result( - 'https://curiositystream.com/%s/%s' % (media_type, media_id), + f'https://curiositystream.com/{media_type}/{media_id}', ie=ie.ie_key(), video_id=media_id)) return self.playlist_result( entries, collection_id, diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py index 69d50da..870d4f3 100644 --- a/yt_dlp/extractor/cwtv.py +++ b/yt_dlp/extractor/cwtv.py @@ -75,7 +75,7 @@ class CWTVIE(InfoExtractor): raise ExtractorError(data['msg'], expected=True) video_data = data['video'] title = video_data['title'] - mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id + mpx_url = video_data.get('mpx_url') or f'http://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U' season = str_or_none(video_data.get('season')) episode = str_or_none(video_data.get('episode')) @@ -95,5 +95,5 @@ class CWTVIE(InfoExtractor): 'timestamp': parse_iso8601(video_data.get('start_time')), 'age_limit': parse_age_limit(video_data.get('rating')), 'ie_key': 'ThePlatform', - 'thumbnail': video_data.get('large_thumbnail') + 'thumbnail': video_data.get('large_thumbnail'), } diff --git a/yt_dlp/extractor/cybrary.py b/yt_dlp/extractor/cybrary.py index c6995b2..59c8ab4 100644 --- a/yt_dlp/extractor/cybrary.py +++ b/yt_dlp/extractor/cybrary.py @@ -61,9 +61,9 @@ class CybraryIE(CybraryBaseIE): 'series': 'Cybrary Orientation', 'uploader': 'Cybrary', 'chapter': 'Cybrary Orientation Series', - 'chapter_id': '63110' + 'chapter_id': '63110', }, - 'expected_warnings': ['No authenticators for vimeo'] + 'expected_warnings': ['No authenticators for vimeo'], }, { 'url': 'https://app.cybrary.it/immersive/12747143/activity/52686', 'md5': '62f26547dccc59c44363e2a13d4ad08d', @@ -79,9 +79,9 @@ class CybraryIE(CybraryBaseIE): 'series': 'AZ-500: Microsoft Azure Security Technologies', 'uploader': 'Cybrary', 'chapter': 'Implement Network Security', - 'chapter_id': '52693' + 'chapter_id': '52693', }, - 'expected_warnings': ['No authenticators for vimeo'] + 'expected_warnings': ['No authenticators for vimeo'], }] def _real_extract(self, url): @@ -93,7 +93,7 @@ class CybraryIE(CybraryBaseIE): raise ExtractorError('The activity is not a video', expected=True) module = next((m for m in course.get('learning_modules') or [] - if int(activity_id) in traverse_obj(m, ('activities', ..., 'id') or [])), None) + if int(activity_id) in traverse_obj(m, ('activities', ..., 'id'))), None) vimeo_id = self._get_vimeo_id(activity_id) @@ -105,7 +105,7 @@ class CybraryIE(CybraryBaseIE): 'chapter': module.get('title'), 'chapter_id': str_or_none(module.get('id')), 'title': activity.get('title'), - 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'referer': 'https://api.cybrary.it'}) + 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'referer': 'https://api.cybrary.it'}), } @@ -116,17 +116,17 @@ class CybraryCourseIE(CybraryBaseIE): 'info_dict': { 'id': '898', 'title': 'AZ-500: Microsoft Azure Security Technologies', - 'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4' + 'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4', }, - 'playlist_count': 59 + 'playlist_count': 59, }, { 'url': 'https://app.cybrary.it/browse/course/cybrary-orientation', 'info_dict': { 'id': '1245', 'title': 'Cybrary Orientation', - 'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e' + 'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e', }, - 'playlist_count': 4 + 'playlist_count': 4, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index 4c25bea..540676a 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -1,8 +1,8 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_protocol, int_or_none, + join_nonempty, try_get, unescapeHTML, ) @@ -19,7 +19,7 @@ class DailyMailIE(InfoExtractor): 'ext': 'mp4', 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', - } + }, }, { 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', 'only_matching': True, @@ -35,8 +35,8 @@ class DailyMailIE(InfoExtractor): sources_url = (try_get( video_data, (lambda x: x['plugins']['sources']['url'], - lambda x: x['sources']['url']), compat_str) - or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id) + lambda x: x['sources']['url']), str) + or f'http://www.dailymail.co.uk/api/player/{video_id}/video-sources.json') video_sources = self._download_json(sources_url, video_id) body = video_sources.get('body') @@ -53,7 +53,7 @@ class DailyMailIE(InfoExtractor): is_hls = container == 'M2TS' protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url}) formats.append({ - 'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''), + 'format_id': join_nonempty('hls' if is_hls else protocol, tbr), 'url': rendition_url, 'width': int_or_none(rendition.get('frameWidth')), 'height': int_or_none(rendition.get('frameHeight')), diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index c570a4f..632335e 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -87,7 +87,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): %s(xid: "%s"%s) { %s } -}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields), +}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields), # noqa: UP031 }).encode(), headers=self._HEADERS) obj = resp['data'][object_type] if not obj: @@ -143,7 +143,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'like_count': int, 'tags': ['en_quete_d_esprit'], 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080', - } + }, }, { 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', 'md5': '2137c41a8e78554bb09225b8eb322406', @@ -260,8 +260,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): %s audienceCount isOnAir - }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata', - 'password: "%s"' % self.get_param('videopassword') if password else None) + }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata', # noqa: UP031 + 'password: "{}"'.format(self.get_param('videopassword')) if password else None) xid = media['xid'] metadata = self._download_json( @@ -277,7 +277,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list) self.raise_geo_restricted(msg=title, countries=allowed_countries) raise ExtractorError( - '%s said: %s' % (self.IE_NAME, title), expected=True) + f'{self.IE_NAME} said: {title}', expected=True) title = metadata['title'] is_live = media.get('isOnAir') @@ -363,7 +363,7 @@ class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor): } } }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page), - 'Downloading page %d' % page)['videos'] + f'Downloading page {page}')['videos'] for edge in videos['edges']: node = edge['node'] yield self.url_result( @@ -396,7 +396,7 @@ class DailymotionPlaylistIE(DailymotionPlaylistBaseIE): r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage): for p in re.findall(r'list\[\]=/playlist/([^/]+)/', unescapeHTML(mobj.group('url'))): - yield '//dailymotion.com/playlist/%s' % p + yield f'//dailymotion.com/playlist/{p}' class DailymotionSearchIE(DailymotionPlaylistBaseIE): @@ -424,7 +424,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE): 'limit': 20, 'page': page, 'query': term, - } + }, }).encode(), headers=self._HEADERS) obj = traverse_obj(resp, ('data', 'search', {dict})) if not obj: diff --git a/yt_dlp/extractor/dailywire.py b/yt_dlp/extractor/dailywire.py index f177c9d..3d5bb25 100644 --- a/yt_dlp/extractor/dailywire.py +++ b/yt_dlp/extractor/dailywire.py @@ -35,7 +35,7 @@ class DailyWireIE(DailyWireBaseIE): 'creator': 'Caroline Roberts', 'series_id': 'ckzplm0a097fn0826r2vc3j7h', 'series': 'China: The Enemy Within', - } + }, }, { 'url': 'https://www.dailywire.com/episode/ep-124-bill-maher', 'info_dict': { @@ -48,7 +48,7 @@ class DailyWireIE(DailyWireBaseIE): 'description': 'md5:adb0de584bcfa9c41374999d9e324e98', 'series_id': 'cjzvep7270hp00786l9hwccob', 'series': 'The Sunday Special', - } + }, }, { 'url': 'https://www.dailywire.com/videos/the-hyperions', 'only_matching': True, @@ -95,7 +95,7 @@ class DailyWirePodcastIE(DailyWireBaseIE): 'description': 'md5:c4afbadda4e1c38a4496f6d62be55634', 'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1639506575562.jpg', 'duration': 900.117667, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py index 2e0f6f0..9ac0b6f 100644 --- a/yt_dlp/extractor/damtomo.py +++ b/yt_dlp/extractor/damtomo.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate @@ -32,7 +31,7 @@ class DamtomoBaseIE(InfoExtractor): # and never likely to happen in the future transform_source=lambda x: re.sub(r'\s*encoding="[^"]+?"', '', x)) m3u8_url = try_get(stream_tree, lambda x: x.find( - './/d:streamingUrl', {'d': self._DKML_XML_NS}).text.strip(), compat_str) + './/d:streamingUrl', {'d': self._DKML_XML_NS}).text.strip(), str) if not m3u8_url: raise ExtractorError('Failed to obtain m3u8 URL') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') @@ -68,7 +67,7 @@ class DamtomoVideoIE(DamtomoBaseIE): 'track': 'Get Wild', 'artist': 'TM NETWORK(TMN)', 'upload_date': '20201226', - } + }, }] @@ -90,7 +89,7 @@ class DamtomoRecordIE(DamtomoBaseIE): 'like_count': 1, 'track': 'イカSUMMER [良音]', 'artist': 'ORANGE RANGE', - } + }, }, { 'url': 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=27489418', 'info_dict': { @@ -104,5 +103,5 @@ class DamtomoRecordIE(DamtomoBaseIE): 'like_count': 3, 'track': '心みだれて〜say it with flowers〜(生音)', 'artist': '小林明子', - } + }, }] diff --git a/yt_dlp/extractor/daum.py b/yt_dlp/extractor/daum.py index 24c5208..ee84449 100644 --- a/yt_dlp/extractor/daum.py +++ b/yt_dlp/extractor/daum.py @@ -1,9 +1,7 @@ import itertools +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, -) from ..utils import parse_qs @@ -83,7 +81,7 @@ class DaumIE(DaumBaseIE): }] def _real_extract(self, url): - video_id = compat_urllib_parse_unquote(self._match_id(url)) + video_id = urllib.parse.unquote(self._match_id(url)) if not video_id.isdigit(): video_id += '@my' return self.url_result( @@ -117,7 +115,7 @@ class DaumClipIE(DaumBaseIE): @classmethod def suitable(cls, url): - return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url) + return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super().suitable(url) def _real_extract(self, url): video_id = self._match_id(url) @@ -131,12 +129,12 @@ class DaumListIE(InfoExtractor): # XXX: Conventionally, base classes should end entries = [] for pagenum in itertools.count(1): list_info = self._download_json( - 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % ( - pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum) + f'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page={pagenum}&{list_id_type}={list_id}', + list_id, f'Downloading list info - {pagenum}') entries.extend([ self.url_result( - 'http://tvpot.daum.net/v/%s' % clip['vid']) + 'http://tvpot.daum.net/v/{}'.format(clip['vid'])) for clip in list_info['clip_list'] ]) @@ -169,7 +167,7 @@ class DaumPlaylistIE(DaumListIE): 'id': '6213966', 'title': 'Woorissica Official', }, - 'playlist_mincount': 181 + 'playlist_mincount': 181, }, { 'note': 'Playlist url with clipid - noplaylist', 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844', @@ -182,12 +180,12 @@ class DaumPlaylistIE(DaumListIE): 'params': { 'noplaylist': True, 'skip_download': True, - } + }, }] @classmethod def suitable(cls, url): - return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url) + return False if DaumUserIE.suitable(url) else super().suitable(url) def _real_extract(self, url): list_id = self._match_id(url) @@ -211,7 +209,7 @@ class DaumUserIE(DaumListIE): 'id': 'o2scDLIVbHc0', 'title': '마이 리틀 텔레비전', }, - 'playlist_mincount': 213 + 'playlist_mincount': 213, }, { 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156', 'info_dict': { @@ -219,12 +217,12 @@ class DaumUserIE(DaumListIE): 'ext': 'mp4', 'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116', 'upload_date': '20160117', - 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36' + 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36', }, 'params': { 'noplaylist': True, 'skip_download': True, - } + }, }, { 'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence', 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631', @@ -232,7 +230,7 @@ class DaumUserIE(DaumListIE): 'id': '6196631', 'title': '마이 리틀 텔레비전 - 20160109', }, - 'playlist_count': 11 + 'playlist_count': 11, }, { 'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0', 'only_matching': True, diff --git a/yt_dlp/extractor/dbtv.py b/yt_dlp/extractor/dbtv.py index 18be46f..795fbac 100644 --- a/yt_dlp/extractor/dbtv.py +++ b/yt_dlp/extractor/dbtv.py @@ -18,7 +18,7 @@ class DBTVIE(InfoExtractor): 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ', 'uploader': 'Dagbladet', }, - 'add_ie': ['Youtube'] + 'add_ie': ['Youtube'], }, { 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false', 'only_matching': True, diff --git a/yt_dlp/extractor/dctp.py b/yt_dlp/extractor/dctp.py index 24bb6ac..09bdbf2 100644 --- a/yt_dlp/extractor/dctp.py +++ b/yt_dlp/extractor/dctp.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, int_or_none, @@ -37,18 +36,18 @@ class DctpTvIE(InfoExtractor): display_id = self._match_id(url) version = self._download_json( - '%s/version.json' % self._BASE_URL, display_id, + f'{self._BASE_URL}/version.json', display_id, 'Downloading version JSON') - restapi_base = '%s/%s/restapi' % ( + restapi_base = '{}/{}/restapi'.format( self._BASE_URL, version['version_name']) info = self._download_json( - '%s/slugs/%s.json' % (restapi_base, display_id), display_id, + f'{restapi_base}/slugs/{display_id}.json', display_id, 'Downloading video info JSON') media = self._download_json( - '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])), + '{}/media/{}.json'.format(restapi_base, str(info['object_id'])), display_id, 'Downloading media JSON') uuid = media['uuid'] @@ -57,7 +56,7 @@ class DctpTvIE(InfoExtractor): formats = [] def add_formats(suffix): - templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix) + templ = f'https://%s/{uuid}_dctp_{suffix}.m4v' formats.extend([{ 'format_id': 'hls-' + suffix, 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8', diff --git a/yt_dlp/extractor/deezer.py b/yt_dlp/extractor/deezer.py index f61f12a..2ca8be5 100644 --- a/yt_dlp/extractor/deezer.py +++ b/yt_dlp/extractor/deezer.py @@ -22,7 +22,7 @@ class DeezerBaseInfoExtractor(InfoExtractor): default=None) if geoblocking_msg is not None: raise ExtractorError( - 'Deezer said: %s' % geoblocking_msg, expected=True) + f'Deezer said: {geoblocking_msg}', expected=True) data_json = self._search_regex( (r'__DZR_APP_STATE__\s*=\s*({.+?})\s*</script>', @@ -67,7 +67,7 @@ class DeezerPlaylistIE(DeezerBaseInfoExtractor): entries.append({ 'id': s.get('SNG_ID'), 'duration': int_or_none(s.get('DURATION')), - 'title': '%s - %s' % (artists, s.get('SNG_TITLE')), + 'title': '{} - {}'.format(artists, s.get('SNG_TITLE')), 'uploader': s.get('ART_NAME'), 'uploader_id': s.get('ART_ID'), 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0, @@ -119,7 +119,7 @@ class DeezerAlbumIE(DeezerBaseInfoExtractor): entries.append({ 'id': s.get('SNG_ID'), 'duration': int_or_none(s.get('DURATION')), - 'title': '%s - %s' % (artists, s.get('SNG_TITLE')), + 'title': '{} - {}'.format(artists, s.get('SNG_TITLE')), 'uploader': s.get('ART_NAME'), 'uploader_id': s.get('ART_ID'), 'age_limit': 16 if s.get('EXPLICIT_LYRICS') == '1' else 0, diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py index 1774249..80c56b4 100644 --- a/yt_dlp/extractor/democracynow.py +++ b/yt_dlp/extractor/democracynow.py @@ -1,8 +1,8 @@ import os.path import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( remove_start, url_basename, @@ -52,7 +52,7 @@ class DemocracynowIE(InfoExtractor): media_url = json_data.get(key, '') if not media_url: continue - media_url = re.sub(r'\?.*', '', compat_urlparse.urljoin(url, media_url)) + media_url = re.sub(r'\?.*', '', urllib.parse.urljoin(url, media_url)) video_id = video_id or remove_start(os.path.splitext(url_basename(media_url))[0], 'dn') formats.append({ 'url': media_url, @@ -70,13 +70,13 @@ class DemocracynowIE(InfoExtractor): # chapter_file are not subtitles if 'caption_file' in json_data: add_subtitle_item(default_lang, { - 'url': compat_urlparse.urljoin(url, json_data['caption_file']), + 'url': urllib.parse.urljoin(url, json_data['caption_file']), }) for subtitle_item in json_data.get('captions', []): lang = subtitle_item.get('language', '').lower() or default_lang add_subtitle_item(lang, { - 'url': compat_urlparse.urljoin(url, subtitle_item['url']), + 'url': urllib.parse.urljoin(url, subtitle_item['url']), }) description = self._og_search_description(webpage, default=None) diff --git a/yt_dlp/extractor/detik.py b/yt_dlp/extractor/detik.py index f148054..5097759 100644 --- a/yt_dlp/extractor/detik.py +++ b/yt_dlp/extractor/detik.py @@ -17,8 +17,8 @@ class DetikEmbedIE(InfoExtractor): 'tags': ['raja charles', ' raja charles iii', ' ratu elizabeth', ' ratu elizabeth meninggal dunia', ' raja inggris', ' inggris'], 'release_timestamp': 1662869995, 'release_date': '20220911', - 'uploader': 'REUTERS' - } + 'uploader': 'REUTERS', + }, }, { # 20.detik 'url': 'https://20.detik.com/otobuzz/20220704-220704093/mulai-rp-10-jutaan-ini-skema-kredit-mitsubishi-pajero-sport', @@ -36,8 +36,8 @@ class DetikEmbedIE(InfoExtractor): 'release_timestamp': 1656926321, 'release_date': '20220704', 'age_limit': 0, - 'uploader': 'Ridwan Arifin ' # TODO: strip trailling whitespace at uploader - } + 'uploader': 'Ridwan Arifin ', # TODO: strip trailling whitespace at uploader + }, }, { # pasangmata.detik 'url': 'https://pasangmata.detik.com/contribution/366649', @@ -49,7 +49,7 @@ class DetikEmbedIE(InfoExtractor): 'age_limit': 0, 'tags': 'count:17', 'thumbnail': 'https://akcdn.detik.net.id/community/data/media/thumbs-pasangmata/2022/09/08/366649-16626229351533009620.mp4-03.jpg', - } + }, }, { # insertlive embed 'url': 'https://www.insertlive.com/embed/video/290482', @@ -64,7 +64,7 @@ class DetikEmbedIE(InfoExtractor): 'title': 'Diincar Leonardo DiCaprio, Gigi Hadid Ngaku Tertarik Tapi Belum Cinta', 'tags': ['leonardo dicaprio', ' gigi hadid', ' hollywood'], 'uploader': '!nsertlive', - } + }, }, { # beautynesia embed 'url': 'https://www.beautynesia.id/embed/video/261636', @@ -79,7 +79,7 @@ class DetikEmbedIE(InfoExtractor): 'tags': ['zodiac update', ' zodiak', ' ramalan bintang', ' zodiak beruntung 2022', ' zodiak hoki september 2022', ' zodiak beruntung september 2022'], 'thumbnail': 'https://akcdn.detik.net.id/visual/2022/09/05/3-zodiak-paling-beruntung-selama-september-2022_169.jpeg?w=600&q=90', 'uploader': 'amh', - } + }, }, { # cnbcindonesia embed 'url': 'https://www.cnbcindonesia.com/embed/video/371839', @@ -91,7 +91,7 @@ class DetikEmbedIE(InfoExtractor): 'age_limit': 0, 'thumbnail': 'https://awsimages.detik.net.id/visual/2022/09/13/cnbc-indonesia-tv-3_169.png?w=600&q=80', 'description': 'md5:8b9111e37555fcd95fe549a9b4ae6fdc', - } + }, }, { # detik shortlink (we can get it from https://dtk.id/?<url>) 'url': 'https://dtk.id/NkISKr', @@ -110,7 +110,7 @@ class DetikEmbedIE(InfoExtractor): 'timestamp': 1663139688, 'duration': 213.0, 'tags': ['hacker bjorka', 'bjorka', 'hacker bjorka bocorkan data rahasia presiden jokowi', 'jokowi'], - } + }, }] def _extract_from_webpage(self, url, webpage): @@ -142,7 +142,7 @@ class DetikEmbedIE(InfoExtractor): 'timestamp': int_or_none(self._html_search_meta('dtk:createdateunix', webpage, fatal=False, default=None), 1000), 'uploader': self._search_regex( r'([^-]+)', self._html_search_meta('dtk:author', webpage, default='').strip(), 'uploader', - default=None) + default=None), } formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, display_id) diff --git a/yt_dlp/extractor/deuxm.py b/yt_dlp/extractor/deuxm.py index 74a6da6..c8ce32c 100644 --- a/yt_dlp/extractor/deuxm.py +++ b/yt_dlp/extractor/deuxm.py @@ -12,8 +12,8 @@ class DeuxMIE(InfoExtractor): 'id': '6351d439b15e1a613b3debe8', 'ext': 'mp4', 'title': 'Grand Angle : Jeudi 20 Octobre 2022', - 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$' - } + 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$', + }, }, { 'url': 'https://2m.ma/fr/replay/single/635c0aeab4eec832622356da', 'md5': 'ad6af2f5e4d5b2ad2194a84b6e890b4c', @@ -21,8 +21,8 @@ class DeuxMIE(InfoExtractor): 'id': '635c0aeab4eec832622356da', 'ext': 'mp4', 'title': 'Journal Amazigh : Vendredi 28 Octobre 2022', - 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$' - } + 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$', + }, }] def _real_extract(self, url): @@ -49,8 +49,8 @@ class DeuxMNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Kan Ya Mkan d\u00e9poussi\u00e8re l\u2019histoire du phare du Cap Beddouza', 'description': 'md5:99dcf29b82f1d7f2a4acafed1d487527', - 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$' - } + 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$', + }, }, { 'url': 'https://2m.ma/fr/news/Interview-Casablanca-hors-des-sentiers-battus-avec-Abderrahim-KASSOU-Replay--20221017', 'md5': '7aca29f02230945ef635eb8290283c0c', @@ -59,8 +59,8 @@ class DeuxMNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'Interview: Casablanca hors des sentiers battus avec Abderrahim KASSOU (Replay) ', 'description': 'md5:3b8e78111de9fcc6ef7f7dd6cff2430c', - 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$' - } + 'thumbnail': r're:^https?://2msoread-ww.amagi.tv/mediasfiles/videos/images/.*\.png$', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/dfb.py b/yt_dlp/extractor/dfb.py index c4fb5c2..b397ed9 100644 --- a/yt_dlp/extractor/dfb.py +++ b/yt_dlp/extractor/dfb.py @@ -22,7 +22,7 @@ class DFBIE(InfoExtractor): display_id, video_id = self._match_valid_url(url).groups() player_info = self._download_xml( - 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, + f'http://tv.dfb.de/server/hd_video.php?play={video_id}', display_id) video_info = player_info.find('video') stream_access_url = self._proto_relative_url(video_info.find('url').text.strip()) @@ -46,7 +46,7 @@ class DFBIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'title': video_info.find('title').text, - 'thumbnail': 'http://tv.dfb.de/images/%s_640x360.jpg' % video_id, + 'thumbnail': f'http://tv.dfb.de/images/{video_id}_640x360.jpg', 'upload_date': unified_strdate(video_info.find('time_date').text), 'formats': formats, } diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 4380c41..8b4d5c0 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -1,16 +1,16 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, - parse_resolution, - traverse_obj, try_get, + url_or_none, urlencode_postdata, ) +from ..utils.traversal import traverse_obj class DigitalConcertHallIE(InfoExtractor): IE_DESC = 'DigitalConcertHall extractor' - _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?' _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token' _ACCESS_TOKEN = None _NETRC_MACHINE = 'digitalconcerthall' @@ -26,7 +26,8 @@ class DigitalConcertHallIE(InfoExtractor): 'upload_date': '20210624', 'timestamp': 1624548600, 'duration': 2798, - 'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler', + 'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'], + 'composers': ['Kurt Weill'], }, 'params': {'skip_download': 'm3u8'}, }, { @@ -34,8 +35,9 @@ class DigitalConcertHallIE(InfoExtractor): 'url': 'https://www.digitalconcerthall.com/en/concert/53785', 'info_dict': { 'id': '53785', - 'album_artist': 'Berliner Philharmoniker / Kirill Petrenko', + 'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'], 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich', + 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$', }, 'params': {'skip_download': 'm3u8'}, 'playlist_count': 3, @@ -49,9 +51,20 @@ class DigitalConcertHallIE(InfoExtractor): 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$', 'upload_date': '20220714', 'timestamp': 1657785600, - 'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff', + 'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'], }, 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'Concert with several works and an interview', + 'url': 'https://www.digitalconcerthall.com/en/work/53785-1', + 'info_dict': { + 'id': '53785', + 'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'], + 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich', + 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, + 'playlist_count': 1, }] def _perform_login(self, username, password): @@ -78,7 +91,7 @@ class DigitalConcertHallIE(InfoExtractor): }), headers={ 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': 'https://www.digitalconcerthall.com', - 'Authorization': f'Bearer {self._ACCESS_TOKEN}' + 'Authorization': f'Bearer {self._ACCESS_TOKEN}', }) except ExtractorError: self.raise_login_required(msg='Login info incorrect') @@ -94,18 +107,17 @@ class DigitalConcertHallIE(InfoExtractor): self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={ 'Accept': 'application/json', 'Authorization': f'Bearer {self._ACCESS_TOKEN}', - 'Accept-Language': language + 'Accept-Language': language, }) - m3u8_url = traverse_obj( - stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False) - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False) + formats = [] + for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): + formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False)) yield { 'id': video_id, 'title': item.get('title'), 'composer': item.get('name_composer'), - 'url': m3u8_url, 'formats': formats, 'duration': item.get('duration_total'), 'timestamp': traverse_obj(item, ('date', 'published')), @@ -119,31 +131,32 @@ class DigitalConcertHallIE(InfoExtractor): } def _real_extract(self, url): - language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id') + language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part') if not language: language = 'en' - thumbnail_url = self._html_search_regex( - r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)', - self._download_webpage(url, video_id), 'thumbnail') - thumbnails = [{ - 'url': thumbnail_url, - **parse_resolution(thumbnail_url) - }] - + api_type = 'concert' if type_ == 'work' else type_ vid_info = self._download_json( - f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={ + f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={ 'Accept': 'application/json', - 'Accept-Language': language + 'Accept-Language': language, }) - album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '') + album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...)) + if type_ == 'work': + videos = [videos[int(part) - 1]] + + thumbnail = traverse_obj(vid_info, ( + 'image', ..., {self._proto_relative_url}, {url_or_none}, + {lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size + return { '_type': 'playlist', 'id': video_id, 'title': vid_info.get('title'), - 'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_), - 'thumbnails': thumbnails, - 'album_artist': album_artist, + 'entries': self._entries( + videos, language, type_, thumbnail=thumbnail, album_artists=album_artists), + 'thumbnail': thumbnail, + 'album_artists': album_artists, } diff --git a/yt_dlp/extractor/digiteka.py b/yt_dlp/extractor/digiteka.py index 912e33b..e56ec63 100644 --- a/yt_dlp/extractor/digiteka.py +++ b/yt_dlp/extractor/digiteka.py @@ -65,7 +65,7 @@ class DigitekaIE(InfoExtractor): video_type = 'musique' deliver_info = self._download_json( - 'http://www.ultimedia.com/deliver/video?video=%s&topic=%s' % (video_id, video_type), + f'http://www.ultimedia.com/deliver/video?video={video_id}&topic={video_type}', video_id) yt_id = deliver_info.get('yt_id') diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py index 75b4643..b98279d 100644 --- a/yt_dlp/extractor/discovery.py +++ b/yt_dlp/extractor/discovery.py @@ -1,8 +1,8 @@ import random import string +import urllib.parse from .discoverygo import DiscoveryGoBaseIE -from ..compat import compat_urllib_parse_unquote from ..networking.exceptions import HTTPError from ..utils import ExtractorError @@ -42,7 +42,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): }, 'params': { 'skip_download': True, # requires ffmpeg - } + }, }, { 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision', 'only_matching': True, @@ -67,14 +67,14 @@ class DiscoveryIE(DiscoveryGoBaseIE): # prefer Affiliate Auth Token over Anonymous Auth Token auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn') if auth_storage_cookie and auth_storage_cookie.value: - auth_storage = self._parse_json(compat_urllib_parse_unquote( - compat_urllib_parse_unquote(auth_storage_cookie.value)), + auth_storage = self._parse_json(urllib.parse.unquote( + urllib.parse.unquote(auth_storage_cookie.value)), display_id, fatal=False) or {} access_token = auth_storage.get('a') or auth_storage.get('access_token') if not access_token: access_token = self._download_json( - 'https://%s.com/anonymous' % site, display_id, + f'https://{site}.com/anonymous', display_id, 'Downloading token JSON metadata', query={ 'authRel': 'authorization', 'client_id': '3020a40c2356a645b4b4', diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py index b2663a6..9649485 100644 --- a/yt_dlp/extractor/discoverygo.py +++ b/yt_dlp/extractor/discoverygo.py @@ -40,7 +40,7 @@ class DiscoveryGoBaseIE(InfoExtractor): formats = [] for stream_kind in ('', 'hds'): suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX - stream_url = stream.get('%s%s' % (stream_kind, suffix)) + stream_url = stream.get(f'{stream_kind}{suffix}') if not stream_url: continue if stream_kind == '': @@ -143,8 +143,7 @@ class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): @classmethod def suitable(cls, url): - return False if DiscoveryGoIE.suitable(url) else super( - DiscoveryGoPlaylistIE, cls).suitable(url) + return False if DiscoveryGoIE.suitable(url) else super().suitable(url) def _real_extract(self, url): display_id = self._match_id(url) diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py index d8dde0c..a90f123 100644 --- a/yt_dlp/extractor/disney.py +++ b/yt_dlp/extractor/disney.py @@ -26,7 +26,7 @@ class DisneyIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # Grill.burger 'url': 'http://www.starwars.com/video/rogue-one-a-star-wars-story-intro-featurette', @@ -40,7 +40,7 @@ class DisneyIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'http://videos.disneylatino.com/ver/spider-man-de-regreso-a-casa-primer-adelanto-543a33a1850bdcfcca13bae2', 'only_matching': True, @@ -84,7 +84,7 @@ class DisneyIE(InfoExtractor): video_data = page_data['data'][0] else: webpage = self._download_webpage( - 'http://%s/embed/%s' % (domain, video_id), video_id) + f'http://{domain}/embed/{video_id}', video_id) page_data = self._parse_json(self._search_regex( r'Disney\.EmbedVideo\s*=\s*({.+});', webpage, 'embed data'), video_id) @@ -132,7 +132,7 @@ class DisneyIE(InfoExtractor): }) if not formats and video_data.get('expired'): self.raise_no_formats( - '%s said: %s' % (self.IE_NAME, page_data['translations']['video_expired']), + '{} said: {}'.format(self.IE_NAME, page_data['translations']['video_expired']), expected=True) subtitles = {} diff --git a/yt_dlp/extractor/dispeak.py b/yt_dlp/extractor/dispeak.py index 37f89b9..89c27e0 100644 --- a/yt_dlp/extractor/dispeak.py +++ b/yt_dlp/extractor/dispeak.py @@ -55,7 +55,7 @@ class DigitallySpeakingIE(InfoExtractor): if video_root is None: http_host = xpath_text(metadata, 'httpHost', default=None) if http_host: - video_root = 'http://%s/' % http_host + video_root = f'http://{http_host}/' if video_root is None: # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js # Works for GPUTechConf, too @@ -86,7 +86,7 @@ class DigitallySpeakingIE(InfoExtractor): audios = metadata.findall('./audios/audio') for audio in audios: formats.append({ - 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, + 'url': f'rtmp://{akamai_url}/ondemand?ovpfv=1.1', 'play_path': remove_end(audio.get('url'), '.flv'), 'ext': 'flv', 'vcodec': 'none', @@ -95,14 +95,14 @@ class DigitallySpeakingIE(InfoExtractor): }) for video_key, format_id, preference in ( ('slide', 'slides', -2), ('speaker', 'speaker', -1)): - video_path = xpath_text(metadata, './%sVideo' % video_key) + video_path = xpath_text(metadata, f'./{video_key}Video') if not video_path: continue formats.append({ - 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, + 'url': f'rtmp://{akamai_url}/ondemand?ovpfv=1.1', 'play_path': remove_end(video_path, '.flv'), 'ext': 'flv', - 'format_note': '%s video' % video_key, + 'format_note': f'{video_key} video', 'quality': preference, 'format_id': format_id, }) diff --git a/yt_dlp/extractor/dlf.py b/yt_dlp/extractor/dlf.py index 88a4149..eac2190 100644 --- a/yt_dlp/extractor/dlf.py +++ b/yt_dlp/extractor/dlf.py @@ -37,7 +37,7 @@ class DLFBaseIE(InfoExtractor): 'webpage_url': ('data-audio-download-tracking-path', {url_or_none}), }, get_all=False), 'formats': (self._extract_m3u8_formats(url, audio_id, fatal=False) - if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}]) + if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}]), } @@ -56,12 +56,12 @@ class DLFIE(DLFBaseIE): 'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673', 'uploader': 'Deutschlandfunk', 'series': 'On Stage', - 'channel': 'deutschlandfunk' + 'channel': 'deutschlandfunk', }, 'params': { - 'skip_download': 'm3u8' + 'skip_download': 'm3u8', }, - 'skip': 'This webpage no longer exists' + 'skip': 'This webpage no longer exists', }, { 'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html', 'info_dict': { @@ -72,8 +72,8 @@ class DLFIE(DLFBaseIE): 'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673', 'uploader': 'Deutschlandfunk', 'series': 'Kommentare und Themen der Woche', - 'channel': 'deutschlandfunk' - } + 'channel': 'deutschlandfunk', + }, }, ] @@ -96,7 +96,7 @@ class DLFCorpusIE(DLFBaseIE): 'info_dict': { 'id': 'fechten-russland-belarus-ukraine-protest-100', 'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet', - 'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad' + 'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad', }, 'playlist_mincount': 5, 'playlist': [{ @@ -108,8 +108,8 @@ class DLFCorpusIE(DLFBaseIE): 'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313', 'uploader': 'Deutschlandfunk', 'series': 'Sport', - 'channel': 'deutschlandfunk' - } + 'channel': 'deutschlandfunk', + }, }, { 'info_dict': { 'id': '2ada145f', @@ -119,8 +119,8 @@ class DLFCorpusIE(DLFBaseIE): 'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005', 'uploader': 'Deutschlandfunk', 'series': 'Deutschlandfunk Nova', - 'channel': 'deutschlandfunk-nova' - } + 'channel': 'deutschlandfunk-nova', + }, }, { 'info_dict': { 'id': '5e55e8c9', @@ -130,8 +130,8 @@ class DLFCorpusIE(DLFBaseIE): 'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412', 'uploader': 'Deutschlandfunk', 'series': 'Sport am Samstag', - 'channel': 'deutschlandfunk' - } + 'channel': 'deutschlandfunk', + }, }, { 'info_dict': { 'id': '47e1a096', @@ -141,8 +141,8 @@ class DLFCorpusIE(DLFBaseIE): 'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770', 'uploader': 'Deutschlandfunk', 'series': 'Sport am Samstag', - 'channel': 'deutschlandfunk' - } + 'channel': 'deutschlandfunk', + }, }, { 'info_dict': { 'id': '5e55e8c9', @@ -152,9 +152,9 @@ class DLFCorpusIE(DLFBaseIE): 'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412', 'uploader': 'Deutschlandfunk', 'series': 'Sport am Samstag', - 'channel': 'deutschlandfunk' - } - }] + 'channel': 'deutschlandfunk', + }, + }], }, # Podcast feed with tag buttons, playlist count fluctuates { diff --git a/yt_dlp/extractor/dlive.py b/yt_dlp/extractor/dlive.py index 30fcf9f..157d06c 100644 --- a/yt_dlp/extractor/dlive.py +++ b/yt_dlp/extractor/dlive.py @@ -16,7 +16,7 @@ class DLiveVODIE(InfoExtractor): 'upload_date': '20190701', 'timestamp': 1562011015, 'uploader_id': 'pdp', - } + }, }, { 'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg', 'only_matching': True, @@ -36,7 +36,7 @@ class DLiveVODIE(InfoExtractor): thumbnailUrl viewCount } -}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast'] +}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast'] # noqa: UP031 title = broadcast['title'] formats = self._extract_m3u8_formats( broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native') @@ -71,12 +71,12 @@ class DLiveStreamIE(InfoExtractor): } username } -}''' % display_name}).encode())['data']['userByDisplayName'] +}''' % display_name}).encode())['data']['userByDisplayName'] # noqa: UP031 livestream = user['livestream'] title = livestream['title'] username = user['username'] formats = self._extract_m3u8_formats( - 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username, + f'https://live.prd.dlive.tv/hls/live/{username}.m3u8', display_name, 'mp4') return { 'id': display_name, diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index 244ffdf..fdf19c2 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -187,7 +187,7 @@ class DouyuTVIE(DouyuBaseIE): } stream_formats = [self._download_json( f'https://www.douyu.com/lapi/live/getH5Play/{room_id}', - video_id, note="Downloading livestream format", + video_id, note='Downloading livestream format', data=urlencode_postdata(form_data))] for rate_id in traverse_obj(stream_formats[0], ('data', 'multirates', ..., 'rate')): @@ -208,7 +208,7 @@ class DouyuTVIE(DouyuBaseIE): 'description': ('show_details', {str}), 'uploader': ('nickname', {str}), 'thumbnail': ('room_src', {url_or_none}), - }) + }), } @@ -270,7 +270,7 @@ class DouyuShowIE(DouyuBaseIE): } url_info = self._download_json( 'https://v.douyu.com/api/stream/getStreamUrl', video_id, - data=urlencode_postdata(form_data), note="Downloading video formats") + data=urlencode_postdata(form_data), note='Downloading video formats') formats = [] for name, url in traverse_obj(url_info, ('data', 'thumb_video', {dict.items}, ...)): @@ -284,7 +284,7 @@ class DouyuShowIE(DouyuBaseIE): 'quality': self._QUALITIES.get(name), 'ext': 'mp4' if ext == 'm3u8' else ext, 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https', - **parse_resolution(self._RESOLUTIONS.get(name)) + **parse_resolution(self._RESOLUTIONS.get(name)), }) else: self.to_screen( @@ -302,5 +302,5 @@ class DouyuShowIE(DouyuBaseIE): 'timestamp': ('content', 'create_time', {int_or_none}), 'view_count': ('content', 'view_num', {int_or_none}), 'tags': ('videoTag', ..., 'tagName', {str}), - })) + })), } diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index ddf2128..48eae10 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -70,7 +70,7 @@ class DPlayBaseIE(InfoExtractor): self._initialize_geo_bypass({ 'countries': geo_countries, }) - disco_base = 'https://%s/' % disco_host + disco_base = f'https://{disco_host}/' headers = { 'Referer': url, } @@ -84,7 +84,7 @@ class DPlayBaseIE(InfoExtractor): 'fields[show]': 'name', 'fields[tag]': 'name', 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', - 'include': 'images,primaryChannel,show,tags' + 'include': 'images,primaryChannel,show,tags', }) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 400: @@ -359,7 +359,7 @@ class DiscoveryPlusBaseIE(DPlayBaseIE): }, 'videoId': video_id, 'wisteriaProperties': {}, - }).encode('utf-8'))['data']['attributes']['streaming'] + }).encode())['data']['attributes']['streaming'] def _real_extract(self, url): return self._get_disco_api_info(url, self._match_id(url), **self._DISCO_API_PARAMS) @@ -857,7 +857,7 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE): }, 'params': { 'skip_download': True, - } + }, }] _PRODUCT = 'dplus-india' @@ -870,7 +870,7 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE): def _update_disco_api_headers(self, headers, disco_base, display_id, realm): headers.update({ - 'x-disco-params': 'realm=%s' % realm, + 'x-disco-params': f'realm={realm}', 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:17.0.0', 'Authorization': self._get_auth(disco_base, display_id, realm), }) @@ -967,14 +967,14 @@ class DiscoveryPlusShowBaseIE(DPlayBaseIE): while page_num < total_pages: season_json = self._download_json( season_url.format(season_id, show_id, str(page_num + 1)), show_name, headers=headers, - note='Downloading season %s JSON metadata%s' % (season_id, ' page %d' % page_num if page_num else '')) + note='Downloading season {} JSON metadata{}'.format(season_id, f' page {page_num}' if page_num else '')) if page_num == 0: total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1 episodes_json = season_json['data'] for episode in episodes_json: video_path = episode['attributes']['path'] yield self.url_result( - '%svideos/%s' % (self._DOMAIN, video_path), + f'{self._DOMAIN}videos/{video_path}', ie=self._VIDEO_IE.ie_key(), video_id=episode.get('id') or video_path) page_num += 1 @@ -1002,7 +1002,7 @@ class DiscoveryPlusItalyIE(DiscoveryPlusBaseIE): def _update_disco_api_headers(self, headers, disco_base, display_id, realm): headers.update({ - 'x-disco-params': 'realm=%s' % realm, + 'x-disco-params': f'realm={realm}', 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6', 'Authorization': self._get_auth(disco_base, display_id, realm), }) diff --git a/yt_dlp/extractor/drbonanza.py b/yt_dlp/extractor/drbonanza.py index 824d70d..40dc141 100644 --- a/yt_dlp/extractor/drbonanza.py +++ b/yt_dlp/extractor/drbonanza.py @@ -40,7 +40,7 @@ class DRBonanzaIE(InfoExtractor): def extract(field): return self._search_regex( - r'<div[^>]+>\s*<p>%s:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>' % field, + rf'<div[^>]+>\s*<p>{field}:<p>\s*</div>\s*<div[^>]+>\s*<p>([^<]+)</p>', webpage, field, default=None) info.update({ diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py index 8a59c23..4b0a269 100644 --- a/yt_dlp/extractor/dreisat.py +++ b/yt_dlp/extractor/dreisat.py @@ -25,11 +25,11 @@ class DreiSatIE(ZDFIE): # XXX: Do not subclass from concrete IE 'title': 'Waidmannsheil', 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 'timestamp': 1410623100, - 'upload_date': '20140913' + 'upload_date': '20140913', }, 'params': { 'skip_download': True, - } + }, }, { # Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html 'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html', diff --git a/yt_dlp/extractor/drooble.py b/yt_dlp/extractor/drooble.py index 106e5c4..16e9a61 100644 --- a/yt_dlp/extractor/drooble.py +++ b/yt_dlp/extractor/drooble.py @@ -24,7 +24,7 @@ class DroobleIE(InfoExtractor): 'timestamp': 1596241390, 'uploader_id': '95894', 'uploader': 'Bluebeat Shelter', - } + }, }, { 'url': 'https://drooble.com/karl340758/videos/2859183', 'info_dict': { @@ -35,7 +35,7 @@ class DroobleIE(InfoExtractor): 'description': 'md5:ffc0bd8ba383db5341a86a6cd7d9bcca', 'upload_date': '20200731', 'uploader': 'Bluebeat Shelter', - } + }, }, { 'url': 'https://drooble.com/karl340758/music/albums/2858031', 'info_dict': { @@ -88,7 +88,7 @@ class DroobleIE(InfoExtractor): entites.append({ '_type': 'url', 'url': url, - 'ie_key': 'Youtube' + 'ie_key': 'Youtube', }) continue is_audio = (media.get('type') or '').lower() == 'audio' diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index 0246975..51b40df 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -1,9 +1,9 @@ import base64 import os.path import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, update_url_query, @@ -19,8 +19,8 @@ class DropboxIE(InfoExtractor): 'info_dict': { 'id': 'nelirfsxnmcfbfh', 'ext': 'mp4', - 'title': 'youtube-dl test video \'ä"BaW_jenozKc' - } + 'title': 'youtube-dl test video \'ä"BaW_jenozKc', + }, }, { 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh', 'only_matching': True, @@ -40,7 +40,7 @@ class DropboxIE(InfoExtractor): mobj = self._match_valid_url(url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - fn = compat_urllib_parse_unquote(url_basename(url)) + fn = urllib.parse.unquote(url_basename(url)) title = os.path.splitext(fn)[0] password = self.get_param('videopassword') @@ -51,7 +51,7 @@ class DropboxIE(InfoExtractor): content_id = self._search_regex(r'content_id=(.*?)["\']', webpage, 'content_id') payload = f'is_xhr=true&t={self._get_cookies("https://www.dropbox.com").get("t").value}&content_id={content_id}&password={password}&url={url}' response = self._download_json( - 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode('UTF-8'), + 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password', data=payload.encode(), headers={'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'}) if response.get('status') != 'authed': @@ -81,12 +81,12 @@ class DropboxIE(InfoExtractor): 'url': update_url_query(url, {'dl': '1'}), 'format_id': 'original', 'format_note': 'Original', - 'quality': 1 + 'quality': 1, }) return { 'id': video_id, 'title': title, 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index 80ae6c1..7e97c4d 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -43,9 +43,9 @@ class DropoutIE(InfoExtractor): 'duration': 1180, 'uploader_id': 'user80538407', 'uploader_url': 'https://vimeo.com/user80538407', - 'uploader': 'OTT Videos' + 'uploader': 'OTT Videos', }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], }, { 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1', @@ -66,9 +66,9 @@ class DropoutIE(InfoExtractor): 'duration': 6838, 'uploader_id': 'user80538407', 'uploader_url': 'https://vimeo.com/user80538407', - 'uploader': 'OTT Videos' + 'uploader': 'OTT Videos', }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], }, { 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', @@ -85,10 +85,10 @@ class DropoutIE(InfoExtractor): 'duration': 11698, 'uploader_id': 'user80538407', 'uploader_url': 'https://vimeo.com/user80538407', - 'uploader': 'OTT Videos' + 'uploader': 'OTT Videos', }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] - } + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], + }, ] def _get_authenticity_token(self, display_id): @@ -109,7 +109,7 @@ class DropoutIE(InfoExtractor): 'email': username, 'password': password, 'authenticity_token': self._get_authenticity_token(display_id), - 'utf8': True + 'utf8': True, })) user_has_subscription = self._search_regex( @@ -175,8 +175,8 @@ class DropoutSeasonIE(InfoExtractor): 'playlist_count': 24, 'info_dict': { 'id': 'dimension-20-fantasy-high-season-1', - 'title': 'Dimension 20 Fantasy High - Season 1' - } + 'title': 'Dimension 20 Fantasy High - Season 1', + }, }, { 'url': 'https://www.dropout.tv/dimension-20-fantasy-high', @@ -184,8 +184,8 @@ class DropoutSeasonIE(InfoExtractor): 'playlist_count': 24, 'info_dict': { 'id': 'dimension-20-fantasy-high-season-1', - 'title': 'Dimension 20 Fantasy High - Season 1' - } + 'title': 'Dimension 20 Fantasy High - Season 1', + }, }, { 'url': 'https://www.dropout.tv/dimension-20-shriek-week', @@ -193,8 +193,8 @@ class DropoutSeasonIE(InfoExtractor): 'playlist_count': 4, 'info_dict': { 'id': 'dimension-20-shriek-week-season-1', - 'title': 'Dimension 20 Shriek Week - Season 1' - } + 'title': 'Dimension 20 Shriek Week - Season 1', + }, }, { 'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3', @@ -202,9 +202,9 @@ class DropoutSeasonIE(InfoExtractor): 'playlist_count': 25, 'info_dict': { 'id': 'breaking-news-no-laugh-newsroom-season-3', - 'title': 'Breaking News No Laugh Newsroom - Season 3' - } - } + 'title': 'Breaking News No Laugh Newsroom - Season 3', + }, + }, ] def _fetch_page(self, url, season_id, page): diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index a9247ed..6a1fe90 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -25,7 +25,7 @@ class DrTuberIE(InfoExtractor): 'categories': ['Babe', 'Blonde', 'Erotic', 'Outdoor', 'Softcore', 'Solo'], 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, - } + }, }, { 'url': 'http://www.drtuber.com/embed/489939', 'only_matching': True, @@ -40,7 +40,7 @@ class DrTuberIE(InfoExtractor): display_id = mobj.group('display_id') or video_id webpage = self._download_webpage( - 'http://www.drtuber.com/video/%s' % video_id, display_id) + f'http://www.drtuber.com/video/{video_id}', display_id) video_data = self._download_json( 'http://www.drtuber.com/player_config_json/', video_id, query={ @@ -56,7 +56,7 @@ class DrTuberIE(InfoExtractor): formats.append({ 'format_id': format_id, 'quality': 2 if format_id == 'hq' else 1, - 'url': video_url + 'url': video_url, }) duration = int_or_none(video_data.get('duration')) or parse_duration( @@ -76,8 +76,8 @@ class DrTuberIE(InfoExtractor): def extract_count(id_, name, default=NO_DEFAULT): return str_to_int(self._html_search_regex( - r'<span[^>]+(?:class|id)="%s"[^>]*>([\d,\.]+)</span>' % id_, - webpage, '%s count' % name, default=default, fatal=False)) + rf'<span[^>]+(?:class|id)="{id_}"[^>]*>([\d,\.]+)</span>', + webpage, f'{name} count', default=default, fatal=False)) like_count = extract_count('rate_likes', 'like') dislike_count = extract_count('rate_dislikes', 'dislike', default=None) diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py index 2a6e337..32b6845 100644 --- a/yt_dlp/extractor/drtv.py +++ b/yt_dlp/extractor/drtv.py @@ -220,7 +220,7 @@ class DRTVIE(InfoExtractor): lang = sub_track.get('language') or 'da' subtitles.setdefault(self.SUBTITLE_LANGS.get(lang, lang) + subtitle_suffix, []).append({ 'url': sub_track['link'], - 'ext': mimetype2ext(sub_track.get('format')) or 'vtt' + 'ext': mimetype2ext(sub_track.get('format')) or 'vtt', }) if not formats and traverse_obj(item, ('season', 'customFields', 'IsGeoRestricted')): @@ -284,14 +284,14 @@ class DRTVLiveIE(InfoExtractor): if not stream_path: continue stream_url = update_url_query( - '%s/%s' % (server, stream_path), {'b': ''}) + f'{server}/{stream_path}', {'b': ''}) if link_type == 'HLS': formats.extend(self._extract_m3u8_formats( stream_url, channel_id, 'mp4', m3u8_id=link_type, fatal=False, live=True)) elif link_type == 'HDS': formats.extend(self._extract_f4m_formats(update_url_query( - '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}), + f'{server}/{stream_path}', {'hdcore': '3.7.0'}), channel_id, f4m_id=link_type, fatal=False)) return { @@ -317,7 +317,7 @@ class DRTVSeasonIE(InfoExtractor): 'season_number': 2008, 'alt_title': 'Season 2008', }, - 'playlist_mincount': 8 + 'playlist_mincount': 8, }, { 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_8761', 'info_dict': { @@ -328,7 +328,7 @@ class DRTVSeasonIE(InfoExtractor): 'season_number': 2009, 'alt_title': 'Season 2009', }, - 'playlist_mincount': 19 + 'playlist_mincount': 19, }] def _real_extract(self, url): @@ -356,7 +356,7 @@ class DRTVSeasonIE(InfoExtractor): 'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')), 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), 'entries': entries, - 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')) + 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')), } @@ -373,7 +373,7 @@ class DRTVSeriesIE(InfoExtractor): 'series': 'Frank & Kastaniegaarden', 'alt_title': '', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }] def _real_extract(self, url): @@ -387,7 +387,7 @@ class DRTVSeriesIE(InfoExtractor): 'title': season.get('title'), 'alt_title': season.get('contextualTitle'), 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), - 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')) + 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')), } for season in traverse_obj(data, ('entries', 0, 'item', 'show', 'seasons', 'items'))] return { @@ -397,5 +397,5 @@ class DRTVSeriesIE(InfoExtractor): 'title': traverse_obj(data, ('entries', 0, 'item', 'title')), 'alt_title': traverse_obj(data, ('entries', 0, 'item', 'contextualTitle')), 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), - 'entries': entries + 'entries': entries, } diff --git a/yt_dlp/extractor/dtube.py b/yt_dlp/extractor/dtube.py index 5ea014c..0d87820 100644 --- a/yt_dlp/extractor/dtube.py +++ b/yt_dlp/extractor/dtube.py @@ -49,16 +49,16 @@ class DTubeIE(InfoExtractor): formats = [] for q in ('240', '480', '720', '1080', ''): - video_url = canonical_url(content.get('video%shash' % q)) + video_url = canonical_url(content.get(f'video{q}hash')) if not video_url: continue format_id = (q + 'p') if q else 'Source' try: - self.to_screen('%s: Checking %s video format URL' % (video_id, format_id)) + self.to_screen(f'{video_id}: Checking {format_id} video format URL') self._downloader._opener.open(video_url, timeout=5).close() except socket.timeout: self.to_screen( - '%s: %s URL is invalid, skipping' % (video_id, format_id)) + f'{video_id}: {format_id} URL is invalid, skipping') continue formats.append({ 'format_id': format_id, diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py index adc7705..68c3f05 100644 --- a/yt_dlp/extractor/duboku.py +++ b/yt_dlp/extractor/duboku.py @@ -3,7 +3,6 @@ import re import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, clean_html, @@ -24,23 +23,23 @@ def _get_elements_by_tag_and_attrib(html, tag=None, attribute=None, value=None, if attribute is None: attribute = '' else: - attribute = r'\s+(?P<attribute>%s)' % re.escape(attribute) + attribute = rf'\s+(?P<attribute>{re.escape(attribute)})' if value is None: value = '' else: value = re.escape(value) if escape_value else value - value = '=[\'"]?(?P<value>%s)[\'"]?' % value + value = f'=[\'"]?(?P<value>{value})[\'"]?' retlist = [] - for m in re.finditer(r'''(?xs) - <(?P<tag>%s) + for m in re.finditer(rf'''(?xs) + <(?P<tag>{tag}) (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? - %s%s + {attribute}{value} (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? \s*> (?P<content>.*?) </\1> - ''' % (tag, attribute, value), html): + ''', html): retlist.append(m) return retlist @@ -101,7 +100,7 @@ class DubokuIE(InfoExtractor): season_id = temp[1] episode_id = temp[2] - webpage_url = 'https://w.duboku.io/vodplay/%s.html' % video_id + webpage_url = f'https://w.duboku.io/vodplay/{video_id}.html' webpage_html = self._download_webpage(webpage_url, video_id) # extract video url @@ -165,7 +164,7 @@ class DubokuIE(InfoExtractor): 'episode_number': int_or_none(episode_id), 'episode_id': episode_id, 'formats': formats, - 'http_headers': headers + 'http_headers': headers, } @@ -193,11 +192,11 @@ class DubokuPlaylistIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) if mobj is None: - raise ExtractorError('Invalid URL: %s' % url) + raise ExtractorError(f'Invalid URL: {url}') series_id = mobj.group('id') - fragment = compat_urlparse.urlparse(url).fragment + fragment = urllib.parse.urlparse(url).fragment - webpage_url = 'https://w.duboku.io/voddetail/%s.html' % series_id + webpage_url = f'https://w.duboku.io/voddetail/{series_id}.html' webpage_html = self._download_webpage(webpage_url, series_id) # extract title @@ -221,7 +220,7 @@ class DubokuPlaylistIE(InfoExtractor): div.group('content'), 'a', 'href', value='[^\'"]+?', escape_value=False): playlist.append({ 'href': unescapeHTML(a.group('value')), - 'title': unescapeHTML(a.group('content')) + 'title': unescapeHTML(a.group('content')), }) playlists[playlist_id] = playlist @@ -237,11 +236,11 @@ class DubokuPlaylistIE(InfoExtractor): (playlist_id, playlist) = first if not playlist: raise ExtractorError( - 'Cannot find %s' % fragment if fragment else 'Cannot extract playlist') + f'Cannot find {fragment}' if fragment else 'Cannot extract playlist') # return url results return self.playlist_result([ self.url_result( - compat_urlparse.urljoin('https://w.duboku.io', x['href']), + urllib.parse.urljoin('https://w.duboku.io', x['href']), ie=DubokuIE.ie_key(), video_title=x.get('title')) for x in playlist], series_id + '#' + playlist_id, title) diff --git a/yt_dlp/extractor/dumpert.py b/yt_dlp/extractor/dumpert.py index 5e7aef0..1c2050c 100644 --- a/yt_dlp/extractor/dumpert.py +++ b/yt_dlp/extractor/dumpert.py @@ -24,7 +24,7 @@ class DumpertIE(InfoExtractor): 'duration': 9, 'view_count': int, 'like_count': int, - } + }, }, { 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7', 'only_matching': True, @@ -46,7 +46,7 @@ class DumpertIE(InfoExtractor): 'view_count': int, 'like_count': int, }, - 'params': {'skip_download': 'm3u8'} + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.dumpert.nl/toppers?selectedId=100031688_b317a185', 'only_matching': True, diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index e6660dc..3e442b3 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -28,7 +28,7 @@ class DVTVIE(InfoExtractor): 'duration': 1484, 'upload_date': '20141217', 'timestamp': 1418792400, - } + }, }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'info_dict': { @@ -44,7 +44,7 @@ class DVTVIE(InfoExtractor): 'description': 'md5:0916925dea8e30fe84222582280b47a0', 'timestamp': 1418760010, 'upload_date': '20141216', - } + }, }, { 'md5': '5f7652a08b05009c1292317b449ffea2', 'info_dict': { @@ -54,7 +54,7 @@ class DVTVIE(InfoExtractor): 'description': 'md5:ff2f9f6de73c73d7cef4f756c1c1af42', 'timestamp': 1418760010, 'upload_date': '20141216', - } + }, }, { 'md5': '498eb9dfa97169f409126c617e2a3d64', 'info_dict': { @@ -64,7 +64,7 @@ class DVTVIE(InfoExtractor): 'description': 'md5:889fe610a70fee5511dc3326a089188e', 'timestamp': 1418760010, 'upload_date': '20141216', - } + }, }, { 'md5': 'b8dc6b744844032dab6ba3781a7274b9', 'info_dict': { @@ -74,7 +74,7 @@ class DVTVIE(InfoExtractor): 'description': 'md5:544f86de6d20c4815bea11bf2ac3004f', 'timestamp': 1418760010, 'upload_date': '20141216', - } + }, }], }, { 'url': 'https://video.aktualne.cz/dvtv/zeman-si-jen-leci-mindraky-sobotku-nenavidi-a-babis-se-mu-te/r~960cdb3a365a11e7a83b0025900fea04/', @@ -150,7 +150,7 @@ class DVTVIE(InfoExtractor): 'thumbnail': data.get('image'), 'duration': int_or_none(data.get('duration')), 'timestamp': int_or_none(timestamp), - 'formats': formats + 'formats': formats, } def _real_extract(self, url): @@ -170,7 +170,7 @@ class DVTVIE(InfoExtractor): webpage, 'video', default=None) if item: # remove function calls (ex. htmldeentitize) - # TODO this should be fixed in a general way in the js_to_json + # TODO: this should be fixed in a general way in the js_to_json item = re.sub(r'\w+?\((.+)\)', r'\1', item) return self._parse_video_metadata(item, video_id, timestamp) diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py index feab804..320e29b 100644 --- a/yt_dlp/extractor/dw.py +++ b/yt_dlp/extractor/dw.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( int_or_none, unified_strdate, @@ -22,7 +23,7 @@ class DWIE(InfoExtractor): 'title': 'Intelligent light', 'description': 'md5:90e00d5881719f2a6a5827cb74985af1', 'upload_date': '20160605', - } + }, }, { # audio 'url': 'http://www.dw.com/en/worldlink-my-business/av-19111941', @@ -33,7 +34,7 @@ class DWIE(InfoExtractor): 'title': 'WorldLink: My business', 'description': 'md5:bc9ca6e4e063361e21c920c53af12405', 'upload_date': '20160311', - } + }, }, { # DW documentaries, only last for one or two weeks 'url': 'http://www.dw.com/en/documentaries-welcome-to-the-90s-2016-05-21/e-19220158-9798', @@ -60,7 +61,7 @@ class DWIE(InfoExtractor): formats = [{'url': hidden_inputs['file_name']}] else: formats = self._extract_smil_formats( - 'http://www.dw.com/smil/v-%s' % media_id, media_id, + f'http://www.dw.com/smil/v-{media_id}', media_id, transform_source=lambda s: s.replace( 'rtmp://tv-od.dw.de/flash/', 'http://tv-download.dw.de/dwtv_video/flv/')) @@ -97,7 +98,7 @@ class DWArticleIE(InfoExtractor): 'title': 'The harsh life of refugees in Idomeni', 'description': 'md5:196015cc7e48ebf474db9399420043c7', 'upload_date': '20160310', - } + }, } def _real_extract(self, url): @@ -105,6 +106,6 @@ class DWArticleIE(InfoExtractor): webpage = self._download_webpage(url, article_id) hidden_inputs = self._hidden_inputs(webpage) media_id = hidden_inputs['media_id'] - media_path = self._search_regex(r'href="([^"]+av-%s)"\s+class="overlayLink"' % media_id, webpage, 'media url') - media_url = compat_urlparse.urljoin(url, media_path) + media_path = self._search_regex(rf'href="([^"]+av-{media_id})"\s+class="overlayLink"', webpage, 'media url') + media_url = urllib.parse.urljoin(url, media_path) return self.url_result(media_url, 'DW', media_id) diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py index 739d179..685f8c0 100644 --- a/yt_dlp/extractor/eagleplatform.py +++ b/yt_dlp/extractor/eagleplatform.py @@ -70,14 +70,14 @@ class EaglePlatformIE(InfoExtractor): ''' # "Basic usage" embedding (see http://dultonmedia.github.io/eplayer/) mobj = re.search( - r'''(?xs) - %s + rf'''(?xs) + {PLAYER_JS_RE} <div[^>]+ class=(?P<qclass>["\'])eagleplayer(?P=qclass)[^>]+ data-id=["\'](?P<id>\d+) - ''' % PLAYER_JS_RE, webpage) + ''', webpage) if mobj is not None: - return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())] + return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] # Generalization of "Javascript code usage", "Combined usage" and # "Usage without attaching to DOM" embeddings (see # http://dultonmedia.github.io/eplayer/) @@ -96,9 +96,9 @@ class EaglePlatformIE(InfoExtractor): \s*\) .+? </script> - ''' % PLAYER_JS_RE, webpage) + ''' % PLAYER_JS_RE, webpage) # noqa: UP031 if mobj is not None: - return [add_referer('eagleplatform:%(host)s:%(id)s' % mobj.groupdict())] + return [add_referer('eagleplatform:{host}:{id}'.format(**mobj.groupdict()))] @staticmethod def _handle_error(response): @@ -108,7 +108,7 @@ class EaglePlatformIE(InfoExtractor): def _download_json(self, url_or_request, video_id, *args, **kwargs): try: - response = super(EaglePlatformIE, self)._download_json( + response = super()._download_json( url_or_request, video_id, *args, **kwargs) except ExtractorError as ee: if isinstance(ee.cause, HTTPError): @@ -137,7 +137,7 @@ class EaglePlatformIE(InfoExtractor): query['referrer'] = referrer player_data = self._download_json( - 'http://%s/api/player_data' % host, video_id, + f'http://{host}/api/player_data', video_id, headers=headers, query=query) media = player_data['data']['playlist']['viewports'][0]['medialist'][0] @@ -186,7 +186,7 @@ class EaglePlatformIE(InfoExtractor): }) else: f = { - 'format_id': 'http-%s' % format_id, + 'format_id': f'http-{format_id}', 'height': int_or_none(format_id), } f['url'] = format_url @@ -212,4 +212,4 @@ class ClipYouEmbedIE(InfoExtractor): mobj = re.search( r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage) if mobj is not None: - yield smuggle_url('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), {'referrer': url}) + yield smuggle_url('eagleplatform:{host}:{id}'.format(**mobj.groupdict()), {'referrer': url}) diff --git a/yt_dlp/extractor/ebaumsworld.py b/yt_dlp/extractor/ebaumsworld.py index 0854d03..ac766b3 100644 --- a/yt_dlp/extractor/ebaumsworld.py +++ b/yt_dlp/extractor/ebaumsworld.py @@ -18,7 +18,7 @@ class EbaumsWorldIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) config = self._download_xml( - 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id) + f'http://www.ebaumsworld.com/video/player/{video_id}', video_id) video_url = config.find('file').text return { diff --git a/yt_dlp/extractor/ebay.py b/yt_dlp/extractor/ebay.py index d0eb9fc..f1d122f 100644 --- a/yt_dlp/extractor/ebay.py +++ b/yt_dlp/extractor/ebay.py @@ -11,7 +11,7 @@ class EbayIE(InfoExtractor): 'ext': 'mp4', 'title': 'WiFi internal antenna adhesive for wifi 2.4GHz wifi 5 wifi 6 wifi 6E full bands', }, - 'params': {'skip_download': 'm3u8'} + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): @@ -32,5 +32,5 @@ class EbayIE(InfoExtractor): return { 'id': video_id, 'title': remove_end(self._html_extract_title(webpage), ' | eBay'), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/egghead.py b/yt_dlp/extractor/egghead.py index c94f3f8..62d2e54 100644 --- a/yt_dlp/extractor/egghead.py +++ b/yt_dlp/extractor/egghead.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, @@ -13,7 +12,7 @@ class EggheadBaseIE(InfoExtractor): def _call_api(self, path, video_id, resource, fatal=True): return self._download_json( 'https://app.egghead.io/api/v1/' + path, - video_id, 'Downloading %s JSON' % resource, fatal=fatal) + video_id, f'Downloading {resource} JSON', fatal=fatal) class EggheadCourseIE(EggheadBaseIE): @@ -46,7 +45,7 @@ class EggheadCourseIE(EggheadBaseIE): continue lesson_id = lesson.get('id') if lesson_id: - lesson_id = compat_str(lesson_id) + lesson_id = str(lesson_id) entries.append(self.url_result( lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id)) @@ -55,7 +54,7 @@ class EggheadCourseIE(EggheadBaseIE): playlist_id = course.get('id') if playlist_id: - playlist_id = compat_str(playlist_id) + playlist_id = str(playlist_id) return self.playlist_result( entries, playlist_id, course.get('title'), @@ -98,7 +97,7 @@ class EggheadLessonIE(EggheadBaseIE): lesson = self._call_api( 'lessons/' + display_id, display_id, 'lesson') - lesson_id = compat_str(lesson['id']) + lesson_id = str(lesson['id']) title = lesson['title'] formats = [] @@ -129,6 +128,6 @@ class EggheadLessonIE(EggheadBaseIE): 'view_count': int_or_none(lesson.get('plays_count')), 'tags': try_get(lesson, lambda x: x['tag_list'], list), 'series': try_get( - lesson, lambda x: x['series']['title'], compat_str), + lesson, lambda x: x['series']['title'], str), 'formats': formats, } diff --git a/yt_dlp/extractor/eighttracks.py b/yt_dlp/extractor/eighttracks.py index 3dd9ab1..3ac4c56 100644 --- a/yt_dlp/extractor/eighttracks.py +++ b/yt_dlp/extractor/eighttracks.py @@ -2,9 +2,6 @@ import json import random from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( ExtractorError, ) @@ -29,8 +26,8 @@ class EightTracksIE(InfoExtractor): 'id': '11885610', 'ext': 'm4a', 'title': "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } + 'uploader_id': 'ytdl', + }, }, { 'md5': '4ab26f05c1f7291ea460a3920be8021f', @@ -38,8 +35,8 @@ class EightTracksIE(InfoExtractor): 'id': '11885608', 'ext': 'm4a', 'title': "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } + 'uploader_id': 'ytdl', + }, }, { 'md5': 'd30b5b5f74217410f4689605c35d1fd7', @@ -47,8 +44,8 @@ class EightTracksIE(InfoExtractor): 'id': '11885679', 'ext': 'm4a', 'title': "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } + 'uploader_id': 'ytdl', + }, }, { 'md5': '4eb0a669317cd725f6bbd336a29f923a', @@ -56,8 +53,8 @@ class EightTracksIE(InfoExtractor): 'id': '11885680', 'ext': 'm4a', 'title': "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } + 'uploader_id': 'ytdl', + }, }, { 'md5': '1893e872e263a2705558d1d319ad19e8', @@ -65,8 +62,8 @@ class EightTracksIE(InfoExtractor): 'id': '11885682', 'ext': 'm4a', 'title': "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } + 'uploader_id': 'ytdl', + }, }, { 'md5': 'b673c46f47a216ab1741ae8836af5899', @@ -74,8 +71,8 @@ class EightTracksIE(InfoExtractor): 'id': '11885683', 'ext': 'm4a', 'title': "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } + 'uploader_id': 'ytdl', + }, }, { 'md5': '1d74534e95df54986da7f5abf7d842b7', @@ -83,8 +80,8 @@ class EightTracksIE(InfoExtractor): 'id': '11885684', 'ext': 'm4a', 'title': "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } + 'uploader_id': 'ytdl', + }, }, { 'md5': 'f081f47af8f6ae782ed131d38b9cd1c0', @@ -92,10 +89,10 @@ class EightTracksIE(InfoExtractor): 'id': '11885685', 'ext': 'm4a', 'title': "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", - 'uploader_id': 'ytdl' - } - } - ] + 'uploader_id': 'ytdl', + }, + }, + ], } def _real_extract(self, url): @@ -105,7 +102,7 @@ class EightTracksIE(InfoExtractor): data = self._parse_json( self._search_regex( - r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'), + r'(?s)PAGE\.mix\s*=\s*({.+?});\n', webpage, 'trax information'), playlist_id) session = str(random.randint(0, 1000000000)) @@ -116,7 +113,7 @@ class EightTracksIE(InfoExtractor): # duration is sometimes negative, use predefined avg duration if avg_song_duration <= 0: avg_song_duration = 300 - first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id) + first_url = f'http://8tracks.com/sets/{session}/play?player=sm&mix_id={mix_id}&format=jsonh' next_url = first_url entries = [] @@ -140,7 +137,7 @@ class EightTracksIE(InfoExtractor): api_data = json.loads(api_json) track_data = api_data['set']['track'] info = { - 'id': compat_str(track_data['id']), + 'id': str(track_data['id']), 'url': track_data['track_file_stream_url'], 'title': track_data['performer'] + ' - ' + track_data['name'], 'raw_title': track_data['name'], @@ -149,12 +146,12 @@ class EightTracksIE(InfoExtractor): } entries.append(info) - next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % ( + next_url = 'http://8tracks.com/sets/{}/next?player=sm&mix_id={}&format=jsonh&track_id={}'.format( session, mix_id, track_data['id']) return { '_type': 'playlist', 'entries': entries, - 'id': compat_str(mix_id), + 'id': str(mix_id), 'display_id': playlist_id, 'title': data.get('name'), 'description': data.get('description'), diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py index 66afbb6..18b802e 100644 --- a/yt_dlp/extractor/eitb.py +++ b/yt_dlp/extractor/eitb.py @@ -1,6 +1,11 @@ from .common import InfoExtractor from ..networking import Request -from ..utils import float_or_none, int_or_none, parse_iso8601 +from ..utils import ( + float_or_none, + int_or_none, + join_nonempty, + parse_iso8601, +) class EitbIE(InfoExtractor): @@ -26,7 +31,7 @@ class EitbIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/%s/' % video_id, + f'http://mam.eitb.eus/mam/REST/ServiceMultiweb/Video/MULTIWEBTV/{video_id}/', video_id, 'Downloading video JSON') media = video['web_media'][0] @@ -37,12 +42,9 @@ class EitbIE(InfoExtractor): if not video_url: continue tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000) - format_id = 'http' - if tbr: - format_id += '-%d' % int(tbr) formats.append({ 'url': rendition['PMD_URL'], - 'format_id': format_id, + 'format_id': join_nonempty('http', int_or_none(tbr)), 'width': int_or_none(rendition.get('FRAME_WIDTH')), 'height': int_or_none(rendition.get('FRAME_HEIGHT')), 'tbr': tbr, @@ -59,12 +61,12 @@ class EitbIE(InfoExtractor): token = token_data.get('token') if token: formats.extend(self._extract_m3u8_formats( - '%s?hdnts=%s' % (hls_url, token), video_id, m3u8_id='hls', fatal=False)) + f'{hls_url}?hdnts={token}', video_id, m3u8_id='hls', fatal=False)) hds_url = media.get('HDS_SURL') if hds_url: formats.extend(self._extract_f4m_formats( - '%s?hdcore=3.7.0' % hds_url.replace('euskalsvod', 'euskalvod'), + '{}?hdcore=3.7.0'.format(hds_url.replace('euskalsvod', 'euskalvod')), video_id, f4m_id='hds', fatal=False)) return { diff --git a/yt_dlp/extractor/elpais.py b/yt_dlp/extractor/elpais.py index 7c6c880..46fa29f 100644 --- a/yt_dlp/extractor/elpais.py +++ b/yt_dlp/extractor/elpais.py @@ -15,7 +15,7 @@ class ElPaisIE(InfoExtractor): 'title': 'Tiempo nuevo, recetas viejas', 'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.', 'upload_date': '20140206', - } + }, }, { 'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t', 'md5': '3bd5b09509f3519d7d9e763179b013de', @@ -25,7 +25,7 @@ class ElPaisIE(InfoExtractor): 'title': 'Cómo hacer el mejor café con cafetera italiana', 'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.', 'upload_date': '20160303', - } + }, }, { 'url': 'http://elpais.com/elpais/2017/01/26/ciencia/1485456786_417876.html', 'md5': '9c79923a118a067e1a45789e1e0b0f9c', diff --git a/yt_dlp/extractor/eltrecetv.py b/yt_dlp/extractor/eltrecetv.py index f64023a..71cf921 100644 --- a/yt_dlp/extractor/eltrecetv.py +++ b/yt_dlp/extractor/eltrecetv.py @@ -13,7 +13,7 @@ class ElTreceTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'AHORA CAIGO - Programa 06/10/23', 'thumbnail': 'https://thumbs.vodgc.net/AHCA05102023145553329621094.JPG?649339', - } + }, }, { 'url': 'https://www.eltrecetv.com.ar/poco-correctos/capitulos/temporada-2023/programa-del-250923-invitada-dalia-gutmann/', @@ -34,7 +34,7 @@ class ElTreceTVIE(InfoExtractor): { 'url': 'https://www.eltrecetv.com.ar/el-galpon/capitulos/temporada-2023/programa-del-160923-invitado-raul-lavie/', 'only_matching': True, - } + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py index 3bfcc54..696f3e1 100644 --- a/yt_dlp/extractor/epicon.py +++ b/yt_dlp/extractor/epicon.py @@ -14,7 +14,7 @@ class EpiconIE(InfoExtractor): 'title': 'Air Battle of Srinagar', 'description': 'md5:c4de2013af9bc05ae4392e4115d518d7', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, }, { 'url': 'https://www.epicon.in/movies/krit', 'info_dict': { @@ -23,7 +23,7 @@ class EpiconIE(InfoExtractor): 'title': 'Krit', 'description': 'md5:c12b35dad915d48ccff7f013c79bab4a', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, }, { 'url': 'https://www.epicon.in/tv-shows/paapnaashini-ganga/season-1/vardaan', 'info_dict': { @@ -32,7 +32,7 @@ class EpiconIE(InfoExtractor): 'title': 'Paapnaashini Ganga - Season 1 - Ep 1 - VARDAAN', 'description': 'md5:f517058c3d0402398eefa6242f4dd6ae', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, }, { 'url': 'https://www.epicon.in/movies/jayadev', 'info_dict': { @@ -41,16 +41,17 @@ class EpiconIE(InfoExtractor): 'title': 'Jayadev', 'description': 'md5:09e349eecd8e585a3b6466904f19df6c', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) cid = self._search_regex(r'class=\"mylist-icon\ iconclick\"\ id=\"(\d+)', webpage, 'cid') headers = {'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'} data = f'cid={cid}&action=st&type=video'.encode() - data_json = self._parse_json(self._download_json('https://www.epicon.in/ajaxplayer/', id, headers=headers, data=data), id) + data_json = self._parse_json( + self._download_json('https://www.epicon.in/ajaxplayer/', video_id, headers=headers, data=data), video_id) if not data_json['success']: raise ExtractorError(data_json['message'], expected=True) @@ -58,7 +59,7 @@ class EpiconIE(InfoExtractor): title = self._search_regex(r'setplaytitle=\"([^\"]+)', webpage, 'title') description = self._og_search_description(webpage) or None thumbnail = self._og_search_thumbnail(webpage) or None - formats = self._extract_m3u8_formats(data_json['url']['video_url'], id) + formats = self._extract_m3u8_formats(data_json['url']['video_url'], video_id) subtitles = {} for subtitle in data_json.get('subtitles', []): @@ -70,7 +71,7 @@ class EpiconIE(InfoExtractor): }) return { - 'id': id, + 'id': video_id, 'formats': formats, 'title': title, 'description': description, @@ -108,8 +109,8 @@ class EpiconSeriesIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) - episodes = re.findall(r'ct-tray-url=\"(tv-shows/%s/[^\"]+)' % id, webpage) - entries = [self.url_result('https://www.epicon.in/%s' % episode, ie=EpiconIE.ie_key()) for episode in episodes] - return self.playlist_result(entries, playlist_id=id) + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + episodes = re.findall(rf'ct-tray-url=\"(tv-shows/{playlist_id}/[^\"]+)', webpage) + entries = [self.url_result(f'https://www.epicon.in/{episode}', EpiconIE) for episode in episodes] + return self.playlist_result(entries, playlist_id=playlist_id) diff --git a/yt_dlp/extractor/epoch.py b/yt_dlp/extractor/epoch.py index 110e78c..9742f76 100644 --- a/yt_dlp/extractor/epoch.py +++ b/yt_dlp/extractor/epoch.py @@ -11,7 +11,7 @@ class EpochIE(InfoExtractor): 'id': 'a3dd732c-4750-4bc8-8156-69180668bda1', 'ext': 'mp4', 'title': '‘They Can Do Audio, Video, Physical Surveillance on You 24H/365D a Year’: Rex Lee on Intrusive Apps', - } + }, }, { 'url': 'https://www.theepochtimes.com/the-communist-partys-cyberattacks-on-america-explained-rex-lee-talks-tech-hybrid-warfare_4342413.html', @@ -19,7 +19,7 @@ class EpochIE(InfoExtractor): 'id': '276c7f46-3bbf-475d-9934-b9bbe827cf0a', 'ext': 'mp4', 'title': 'The Communist Party’s Cyberattacks on America Explained; Rex Lee Talks Tech Hybrid Warfare', - } + }, }, { 'url': 'https://www.theepochtimes.com/kash-patel-a-6-year-saga-of-government-corruption-from-russiagate-to-mar-a-lago_4690250.html', @@ -27,7 +27,7 @@ class EpochIE(InfoExtractor): 'id': 'aa9ceecd-a127-453d-a2de-7153d6fd69b6', 'ext': 'mp4', 'title': 'Kash Patel: A ‘6-Year-Saga’ of Government Corruption, From Russiagate to Mar-a-Lago', - } + }, }, { 'url': 'https://www.theepochtimes.com/dick-morris-discusses-his-book-the-return-trumps-big-2024-comeback_4819205.html', @@ -35,7 +35,7 @@ class EpochIE(InfoExtractor): 'id': '9489f994-2a20-4812-b233-ac0e5c345632', 'ext': 'mp4', 'title': 'Dick Morris Discusses His Book ‘The Return: Trump’s Big 2024 Comeback’', - } + }, }, ] @@ -51,5 +51,5 @@ class EpochIE(InfoExtractor): 'id': youmaker_video_id, 'formats': formats, 'subtitles': subtitles, - 'title': self._html_extract_title(webpage) + 'title': self._html_extract_title(webpage), } diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py index b18a76c..2d25c6b 100644 --- a/yt_dlp/extractor/eporner.py +++ b/yt_dlp/extractor/eporner.py @@ -29,9 +29,6 @@ class EpornerIE(InfoExtractor): 'view_count': int, 'age_limit': 18, }, - 'params': { - 'proxy': '127.0.0.1:8118' - } }, { # New (May 2016) URL layout 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', @@ -56,7 +53,7 @@ class EpornerIE(InfoExtractor): video_id = self._match_id(urlh.url) - hash = self._search_regex( + vid_hash = self._search_regex( r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash') title = self._og_search_title(webpage, default=None) or self._html_search_regex( @@ -64,13 +61,13 @@ class EpornerIE(InfoExtractor): # Reverse engineered from vjs.js def calc_hash(s): - return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8))) + return ''.join(encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)) video = self._download_json( - 'http://www.eporner.com/xhr/video/%s' % video_id, + f'http://www.eporner.com/xhr/video/{video_id}', display_id, note='Downloading video JSON', query={ - 'hash': calc_hash(hash), + 'hash': calc_hash(vid_hash), 'device': 'generic', 'domain': 'www.eporner.com', 'fallback': 'false', @@ -78,7 +75,7 @@ class EpornerIE(InfoExtractor): if video.get('available') is False: raise ExtractorError( - '%s said: %s' % (self.IE_NAME, video['message']), expected=True) + '{} said: {}'.format(self.IE_NAME, video['message']), expected=True) sources = video['sources'] diff --git a/yt_dlp/extractor/erocast.py b/yt_dlp/extractor/erocast.py index 92a5753..bd22137 100644 --- a/yt_dlp/extractor/erocast.py +++ b/yt_dlp/extractor/erocast.py @@ -32,7 +32,7 @@ class ErocastIE(InfoExtractor): 'view_count': int, 'comment_count': int, 'webpage_url': 'https://erocast.me/track/9787/f4m-your-roommate-who-is-definitely-not-possessed-by-an-alien-suddenly-wants-to-fuck-you', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/eroprofile.py b/yt_dlp/extractor/eroprofile.py index 2b61f3b..2067217 100644 --- a/yt_dlp/extractor/eroprofile.py +++ b/yt_dlp/extractor/eroprofile.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode from ..utils import ( ExtractorError, merge_dicts, @@ -38,7 +38,7 @@ class EroProfileIE(InfoExtractor): }] def _perform_login(self, username, password): - query = compat_urllib_parse_urlencode({ + query = urllib.parse.urlencode({ 'username': username, 'password': password, 'url': 'http://www.eroprofile.com/', @@ -91,7 +91,7 @@ class EroProfileAlbumIE(InfoExtractor): 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893', 'info_dict': { 'id': 'BBW-2-893', - 'title': 'BBW 2' + 'title': 'BBW 2', }, 'playlist_mincount': 486, }, diff --git a/yt_dlp/extractor/err.py b/yt_dlp/extractor/err.py index abd00f2..7896cdb 100644 --- a/yt_dlp/extractor/err.py +++ b/yt_dlp/extractor/err.py @@ -145,7 +145,7 @@ class ERRJupiterIE(InfoExtractor): 'season_number': 0, 'series': 'Лесные истории | Аисты', 'series_id': '1037497', - } + }, }, { 'note': 'Lasteekraan: Pätu', 'url': 'https://lasteekraan.err.ee/1092243/patu', diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 19c6933..864aa6d 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -2,7 +2,6 @@ import json import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, clean_html, @@ -30,19 +29,19 @@ class ERTFlixBaseIE(InfoExtractor): headers = headers or {} if data: headers['Content-Type'] = headers_as_param['Content-Type'] = 'application/json;charset=utf-8' - data = json.dumps(merge_dicts(platform_codename, data)).encode('utf-8') + data = json.dumps(merge_dicts(platform_codename, data)).encode() query = merge_dicts( {} if data else platform_codename, {'$headers': json.dumps(headers_as_param)}, params) response = self._download_json( - 'https://api.app.ertflix.gr/v%s/%s' % (str(api_version), method), + f'https://api.app.ertflix.gr/v{api_version!s}/{method}', video_id, fatal=False, query=query, data=data, headers=headers) if try_get(response, lambda x: x['Result']['Success']) is True: return response def _call_api_get_tiles(self, video_id, *tile_ids): - requested_tile_ids = [video_id] + list(tile_ids) + requested_tile_ids = [video_id, *tile_ids] requested_tiles = [{'Id': tile_id} for tile_id in requested_tile_ids] tiles_response = self._call_api( video_id, method='Tile/GetTiles', api_version=2, @@ -174,9 +173,9 @@ class ERTFlixIE(ERTFlixBaseIE): }] def _extract_episode(self, episode): - codename = try_get(episode, lambda x: x['Codename'], compat_str) + codename = try_get(episode, lambda x: x['Codename'], str) title = episode.get('Title') - description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription', ))) + description = clean_html(dict_get(episode, ('ShortDescription', 'TinyDescription'))) if not codename or not title or not episode.get('HasPlayableStream', True): return thumbnail = next(( @@ -195,7 +194,7 @@ class ERTFlixIE(ERTFlixBaseIE): 'timestamp': parse_iso8601(episode.get('PublishDate')), 'duration': episode.get('DurationSeconds'), 'age_limit': self._parse_age_rating(episode), - 'url': 'ertflix:%s' % (codename, ), + 'url': f'ertflix:{codename}', } @staticmethod @@ -212,7 +211,7 @@ class ERTFlixIE(ERTFlixBaseIE): series_info = { 'age_limit': self._parse_age_rating(series), 'title': series.get('Title'), - 'description': dict_get(series, ('ShortDescription', 'TinyDescription', )), + 'description': dict_get(series, ('ShortDescription', 'TinyDescription')), } if season_numbers: season_titles = season_titles or [] @@ -281,7 +280,7 @@ class ERTWebtvEmbedIE(InfoExtractor): 'id': 'trailers/E2251_TO_DIKTYO_E09_16-01_1900.mp4', 'title': 'md5:914f06a73cd8b62fbcd6fb90c636e497', 'ext': 'mp4', - 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg' + 'thumbnail': 'https://program.ert.gr/photos/2022/1/to_diktio_ep09_i_istoria_tou_diadiktiou_stin_Ellada_1021x576.jpg', }, }] diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index 7ed824c..4e9b635 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -100,13 +100,13 @@ class ESPNIE(OnceIE): }, { 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875', 'only_matching': True, - }, ] + }] def _real_extract(self, url): video_id = self._match_id(url) clip = self._download_json( - 'http://api-app.espn.com/v1/video/clips/%s' % video_id, + f'http://api-app.espn.com/v1/video/clips/{video_id}', video_id)['videos'][0] title = clip['headline'] @@ -115,16 +115,16 @@ class ESPNIE(OnceIE): formats = [] def traverse_source(source, base_source_id=None): - for source_id, source in source.items(): - if source_id == 'alert': + for src_id, src_item in source.items(): + if src_id == 'alert': continue - elif isinstance(source, str): - extract_source(source, base_source_id) - elif isinstance(source, dict): + elif isinstance(src_item, str): + extract_source(src_item, base_source_id) + elif isinstance(src_item, dict): traverse_source( - source, - '%s-%s' % (base_source_id, source_id) - if base_source_id else source_id) + src_item, + f'{base_source_id}-{src_id}' + if base_source_id else src_id) def extract_source(source_url, source_id=None): if source_url in format_urls: @@ -209,7 +209,7 @@ class ESPNArticleIE(InfoExtractor): webpage, 'video id', group='id') return self.url_result( - 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) + f'http://espn.go.com/video/clip?id={video_id}', ESPNIE.ie_key()) class FiveThirtyEightIE(InfoExtractor): @@ -251,7 +251,7 @@ class ESPNCricInfoIE(InfoExtractor): 'upload_date': '20211113', 'duration': 96, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.espncricinfo.com/cricket-videos/daryl-mitchell-mitchell-santner-is-one-of-the-best-white-ball-spinners-india-vs-new-zealand-1356225', 'info_dict': { @@ -266,12 +266,13 @@ class ESPNCricInfoIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) - data_json = self._download_json(f'https://hs-consumer-api.espncricinfo.com/v1/pages/video/video-details?videoId={id}', id)['video'] + video_id = self._match_id(url) + data_json = self._download_json( + f'https://hs-consumer-api.espncricinfo.com/v1/pages/video/video-details?videoId={video_id}', video_id)['video'] formats, subtitles = [], {} for item in data_json.get('playbacks') or []: if item.get('type') == 'HLS' and item.get('url'): - m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(item['url'], id) + m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(item['url'], video_id) formats.extend(m3u8_frmts) subtitles = self._merge_subtitles(subtitles, m3u8_subs) elif item.get('type') == 'AUDIO' and item.get('url'): @@ -280,7 +281,7 @@ class ESPNCricInfoIE(InfoExtractor): 'vcodec': 'none', }) return { - 'id': id, + 'id': video_id, 'title': data_json.get('title'), 'description': data_json.get('summary'), 'upload_date': unified_strdate(dict_get(data_json, ('publishedAt', 'recordedAt'))), @@ -366,28 +367,28 @@ class WatchESPNIE(AdobePassIE): 'subject_token': assertion, 'subject_token_type': 'urn:bamtech:params:oauth:token-type:device', 'platform': 'android', - 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange' + 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange', })['access_token'] assertion = self._call_bamgrid_api( 'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]}, headers={ 'Authorization': token, - 'Content-Type': 'application/json; charset=UTF-8' + 'Content-Type': 'application/json; charset=UTF-8', })['assertion'] token = self._call_bamgrid_api( 'token', video_id, payload={ 'subject_token': assertion, 'subject_token_type': 'urn:bamtech:params:oauth:token-type:account', 'platform': 'android', - 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange' + 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange', })['access_token'] playback = self._download_json( video_data['videoHref'].format(scenario='browser~ssai'), video_id, headers={ 'Accept': 'application/vnd.media-service+json; version=5', - 'Authorization': token + 'Authorization': token, }) m3u8_url, headers = playback['stream']['complete'][0]['url'], {'authorization': token} diff --git a/yt_dlp/extractor/ettutv.py b/yt_dlp/extractor/ettutv.py index 133b525..7367177 100644 --- a/yt_dlp/extractor/ettutv.py +++ b/yt_dlp/extractor/ettutv.py @@ -56,5 +56,5 @@ class EttuTvIE(InfoExtractor): 'thumbnail': ('image', {url_or_none}), 'timestamp': ('date', {unified_timestamp}), 'is_live': ('isLivestream', {bool_or_none}), - }) + }), } diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index 0cf889a..aa8baf2 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -28,7 +28,7 @@ class EuropaIE(InfoExtractor): 'duration': 34, 'view_count': int, 'formats': 'mincount:3', - } + }, }, { 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786', 'only_matching': True, @@ -41,11 +41,11 @@ class EuropaIE(InfoExtractor): video_id = self._match_id(url) playlist = self._download_xml( - 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id, video_id) + f'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID={video_id}', video_id) def get_item(type_, preference): items = {} - for item in playlist.findall('./info/%s/item' % type_): + for item in playlist.findall(f'./info/{type_}/item'): lang, label = xpath_text(item, 'lg', default=None), xpath_text(item, 'label', default=None) if lang and label: items[lang] = label.strip() @@ -77,7 +77,7 @@ class EuropaIE(InfoExtractor): 'url': video_url, 'format_id': lang, 'format_note': xpath_text(file_, './lglabel'), - 'language_preference': language_preference(lang) + 'language_preference': language_preference(lang), }) return { @@ -88,7 +88,7 @@ class EuropaIE(InfoExtractor): 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, - 'formats': formats + 'formats': formats, } @@ -109,7 +109,7 @@ class EuroParlWebstreamIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { # live webstream 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA', @@ -121,7 +121,7 @@ class EuroParlWebstreamIE(InfoExtractor): 'release_date': '20221115', 'live_status': 'is_live', }, - 'skip': 'not live anymore' + 'skip': 'not live anymore', }, { 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT', 'info_dict': { @@ -131,7 +131,7 @@ class EuroParlWebstreamIE(InfoExtractor): 'release_date': '20230301', 'title': 'Committee on Culture and Education', 'release_timestamp': 1677666641, - } + }, }, { # live stream 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-environment-public-health-and-food-safety_20230524-0900-COMMITTEE-ENVI', @@ -143,7 +143,7 @@ class EuroParlWebstreamIE(InfoExtractor): 'release_timestamp': 1684911541, 'live_status': 'is_live', }, - 'skip': 'Not live anymore' + 'skip': 'Not live anymore', }, { 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER', 'info_dict': { @@ -153,7 +153,7 @@ class EuroParlWebstreamIE(InfoExtractor): 'release_date': '20240320', 'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234', 'release_timestamp': 1710939767, - } + }, }, { 'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER', 'only_matching': True, @@ -170,7 +170,7 @@ class EuroParlWebstreamIE(InfoExtractor): query={ 'api-version': 1.0, 'tenantId': 'bae646ca-1fc8-4363-80ba-2c04f06b4968', - 'externalReference': display_id + 'externalReference': display_id, }) formats, subtitles = [], {} @@ -186,5 +186,5 @@ class EuroParlWebstreamIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, 'release_timestamp': parse_iso8601(json_info.get('startDateTime')), - 'is_live': traverse_obj(webpage_nextjs, ('mediaItem', 'mediaSubType')) == 'Live' + 'is_live': traverse_obj(webpage_nextjs, ('mediaItem', 'mediaSubType')) == 'Live', } diff --git a/yt_dlp/extractor/europeantour.py b/yt_dlp/extractor/europeantour.py index 1995a74..a5503db 100644 --- a/yt_dlp/extractor/europeantour.py +++ b/yt_dlp/extractor/europeantour.py @@ -17,16 +17,16 @@ class EuropeanTourIE(InfoExtractor): 'uploader_id': '5136026580001', 'tags': ['prod-imported'], 'thumbnail': 'md5:fdac52bc826548860edf8145ee74e71a', - 'upload_date': '20211220' + 'upload_date': '20211220', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) vid, aid = re.search(r'(?s)brightcove-player\s?video-id="([^"]+)".*"ACCOUNT_ID":"([^"]+)"', webpage).groups() if not aid: aid = '5136026580001' diff --git a/yt_dlp/extractor/eurosport.py b/yt_dlp/extractor/eurosport.py index 6c426bb..0c5e123 100644 --- a/yt_dlp/extractor/eurosport.py +++ b/yt_dlp/extractor/eurosport.py @@ -16,7 +16,7 @@ class EurosportIE(InfoExtractor): 'display_id': 'vid1694147', 'timestamp': 1654446698, 'upload_date': '20220605', - } + }, }, { 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/watch-the-top-five-shots-from-men-s-final-as-rafael-nadal-beats-casper-ruud-to-seal-14th-french-open_vid1694283/video.shtml', 'info_dict': { @@ -29,7 +29,7 @@ class EurosportIE(InfoExtractor): 'display_id': 'vid1694283', 'timestamp': 1654456090, 'upload_date': '20220605', - } + }, }, { # geo-fence but can bypassed by xff 'url': 'https://www.eurosport.com/cycling/tour-de-france-femmes/2022/incredible-ride-marlen-reusser-storms-to-stage-4-win-at-tour-de-france-femmes_vid1722221/video.shtml', @@ -43,7 +43,7 @@ class EurosportIE(InfoExtractor): 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/07/27/3423347-69852108-2560-1440.jpg', 'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71', 'upload_date': '20220727', - } + }, }, { 'url': 'https://www.eurosport.com/football/champions-league/2022-2023/pep-guardiola-emotionally-destroyed-after-manchester-city-win-over-bayern-munich-in-champions-league_vid1896254/video.shtml', 'info_dict': { @@ -56,7 +56,7 @@ class EurosportIE(InfoExtractor): 'timestamp': 1681292028, 'upload_date': '20230412', 'display_id': 'vid1896254', - } + }, }, { 'url': 'https://www.eurosport.com/football/last-year-s-semi-final-pain-was-still-there-pep-guardiola-after-man-city-reach-cl-final_vid1914115/video.shtml', 'info_dict': { @@ -69,7 +69,7 @@ class EurosportIE(InfoExtractor): 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/05/18/3707254-75435008-2560-1440.jpg', 'duration': 105.0, 'upload_date': '20230518', - } + }, }] _TOKEN = None diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 66fa42f..f08938f 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -20,37 +20,37 @@ class EUScreenIE(InfoExtractor): 'series': 'JA2 DERNIERE', 'episode': '-', 'uploader': 'INA / France', - 'thumbnail': 'http://images3.noterik.com/domain/euscreenxl/user/eu_ina/video/EUS_0EBCBF356BFC4E12A014023BA41BD98C/image.jpg' + 'thumbnail': 'http://images3.noterik.com/domain/euscreenxl/user/eu_ina/video/EUS_0EBCBF356BFC4E12A014023BA41BD98C/image.jpg', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] _payload = b'<fsxml><screen><properties><screenId>-1</screenId></properties><capabilities id="1"><properties><platform>Win32</platform><appcodename>Mozilla</appcodename><appname>Netscape</appname><appversion>5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36</appversion><useragent>Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36</useragent><cookiesenabled>true</cookiesenabled><screenwidth>784</screenwidth><screenheight>758</screenheight><orientation>undefined</orientation><smt_browserid>Sat, 07 Oct 2021 08:56:50 GMT</smt_browserid><smt_sessionid>1633769810758</smt_sessionid></properties></capabilities></screen></fsxml>' def _real_extract(self, url): - id = self._match_id(url) + video_id = self._match_id(url) args_for_js_request = self._download_webpage( 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem', - id, data=self._payload, query={'actionlist': 'itempage', 'id': id}) + video_id, data=self._payload, query={'actionlist': 'itempage', 'id': video_id}) info_js = self._download_webpage( 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem', - id, data=args_for_js_request.replace('screenid', 'screenId').encode()) + video_id, data=args_for_js_request.replace('screenid', 'screenId').encode()) video_json = self._parse_json( self._search_regex(r'setVideo\(({.+})\)\(\$end\$\)put', info_js, 'Video JSON'), - id, transform_source=js_to_json) + video_id, transform_source=js_to_json) meta_json = self._parse_json( self._search_regex(r'setData\(({.+})\)\(\$end\$\)', info_js, 'Metadata JSON'), - id, transform_source=js_to_json) + video_id, transform_source=js_to_json) formats = [{ 'url': source['src'], } for source in video_json.get('sources', [])] return { - 'id': id, + 'id': video_id, 'title': meta_json.get('originalTitle'), 'alt_title': meta_json.get('title'), 'duration': parse_duration(meta_json.get('duration')), - 'description': '%s\n%s' % (meta_json.get('summaryOriginal', ''), meta_json.get('summaryEnglish', '')), + 'description': '{}\n{}'.format(meta_json.get('summaryOriginal', ''), meta_json.get('summaryEnglish', '')), 'series': meta_json.get('series') or meta_json.get('seriesEnglish'), 'episode': meta_json.get('episodeNumber'), 'uploader': meta_json.get('provider'), diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py index b96f2e4..33b8298 100644 --- a/yt_dlp/extractor/expressen.py +++ b/yt_dlp/extractor/expressen.py @@ -58,7 +58,7 @@ class ExpressenIE(InfoExtractor): def extract_data(name): return self._parse_json( self._search_regex( - r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name, + rf'data-{name}=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'info', group='value'), display_id, transform_source=unescapeHTML) diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py index 4a13ab0..b8c67ce 100644 --- a/yt_dlp/extractor/eyedotv.py +++ b/yt_dlp/extractor/eyedotv.py @@ -18,13 +18,13 @@ class EyedoTVIE(InfoExtractor): 'description': 'md5:4abe07293b2f73efc6e1c37028d58c98', 'uploader': 'Afnic Live', 'uploader_id': '8023', - } + }, } _ROOT_URL = 'http://live.eyedo.net:1935/' def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id) + video_data = self._download_xml(f'http://eyedo.tv/api/live/GetLive/{video_id}', video_id) def _add_ns(path): return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api') @@ -33,7 +33,7 @@ class EyedoTVIE(InfoExtractor): state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True) if state_live_code == 'avenir': raise ExtractorError( - '%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME, + f'{self.IE_NAME} said: We\'re sorry, but this video is not yet available.', expected=True) is_live = state_live_code == 'live' @@ -41,11 +41,11 @@ class EyedoTVIE(InfoExtractor): # http://eyedo.tv/Content/Html5/Scripts/html5view.js if is_live: if xpath_text(video_data, 'Cdn') == 'true': - m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id + m3u8_url = f'http://rrr.sz.xlcdn.com/?account=eyedo&file=A{video_id}&type=live&service=wowza&protocol=http&output=playlist.m3u8' else: - m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id + m3u8_url = self._ROOT_URL + f'w/{video_id}/eyedo_720p/playlist.m3u8' else: - m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id) + m3u8_url = self._ROOT_URL + f'replay-w/{video_id}/mp4:{video_id}.mp4/playlist.m3u8' return { 'id': video_id, diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index b76407a..a3ca291 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -3,18 +3,13 @@ import re import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_str, - compat_urllib_parse_unquote, -) +from ..compat import compat_etree_fromstring from ..networking import Request from ..networking.exceptions import network_exceptions from ..utils import ( ExtractorError, clean_html, determine_ext, - error_to_compat_str, float_or_none, format_field, get_element_by_id, @@ -209,7 +204,7 @@ class FacebookIE(InfoExtractor): 'skip_download': True, }, }, { - # FIXME + # FIXME: Cannot parse data error 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471', 'info_dict': { 'id': '1072691702860471', @@ -415,7 +410,7 @@ class FacebookIE(InfoExtractor): }] _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' _api_config = { - 'graphURI': '/api/graphql/' + 'graphURI': '/api/graphql/', } def _perform_login(self, username, password): @@ -450,7 +445,7 @@ class FacebookIE(InfoExtractor): r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>', login_results, 'login error', default=None, group='error') if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) self.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.') return @@ -474,7 +469,7 @@ class FacebookIE(InfoExtractor): if re.search(r'id="checkpointSubmitButton"', check_response) is not None: self.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.') except network_exceptions as err: - self.report_warning('unable to log in: %s' % error_to_compat_str(err)) + self.report_warning(f'unable to log in: {err}') return def _extract_from_url(self, url, video_id): @@ -493,7 +488,7 @@ class FacebookIE(InfoExtractor): page_title = title or self._html_search_regex(( r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>', r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(?P<content>.*?)</span>', - self._meta_regex('og:title'), self._meta_regex('twitter:title'), r'<title>(?P<content>.+?)</title>' + self._meta_regex('og:title'), self._meta_regex('twitter:title'), r'<title>(?P<content>.+?)</title>', ), webpage, 'title', default=None, group='content') description = description or self._html_search_meta( ['description', 'og:description', 'twitter:description'], @@ -525,7 +520,7 @@ class FacebookIE(InfoExtractor): 'timestamp': timestamp, 'thumbnail': thumbnail, 'view_count': parse_count(self._search_regex( - (r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',), + (r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)'), webpage, 'view count', default=None)), 'concurrent_view_count': get_first(post, ( ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})), @@ -578,7 +573,7 @@ class FacebookIE(InfoExtractor): def extract_relay_data(_filter): return self._parse_json(self._search_regex( - r'data-sjs>({.*?%s.*?})</script>' % _filter, + rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage, 'replay data', default='{}'), video_id, fatal=False) or {} def extract_relay_prefetched_data(_filter): @@ -590,7 +585,7 @@ class FacebookIE(InfoExtractor): if not video_data: server_js_data = self._parse_json(self._search_regex([ r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX, - r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX + rf'bigPipe\.onPageletArrive\(({{.*?id\s*:\s*"{self._SUPPORTED_PAGLETS_REGEX}".*?}})\);', ], webpage, 'js data', default='{}'), video_id, js_to_json, False) video_data = extract_from_jsmods_instances(server_js_data) @@ -626,13 +621,16 @@ class FacebookIE(InfoExtractor): 'url': playable_url, }) extract_dash_manifest(video, formats) + if not formats: + # Do not append false positive entry w/o any formats + return automatic_captions, subtitles = {}, {} is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool})) for caption in traverse_obj(video, ( 'video_available_captions_locales', {lambda x: sorted(x, key=lambda c: c['locale'])}, - lambda _, v: url_or_none(v['captions_url']) + lambda _, v: url_or_none(v['captions_url']), )): lang = caption.get('localized_language') or 'und' subs = { @@ -670,7 +668,7 @@ class FacebookIE(InfoExtractor): 'description': description, }) else: - info['title'] = description or 'Facebook video #%s' % v_id + info['title'] = description or f'Facebook video #{v_id}' entries.append(info) def parse_attachment(attachment, key='media'): @@ -699,7 +697,7 @@ class FacebookIE(InfoExtractor): if video: attachments = try_get(video, [ lambda x: x['story']['attachments'], - lambda x: x['creation_story']['attachments'] + lambda x: x['creation_story']['attachments'], ], list) or [] for attachment in attachments: parse_attachment(attachment) @@ -723,7 +721,7 @@ class FacebookIE(InfoExtractor): m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) if m_msg is not None: raise ExtractorError( - 'The video is not available, Facebook said: "%s"' % m_msg.group(1), + f'The video is not available, Facebook said: "{m_msg.group(1)}"', expected=True) elif any(p in webpage for p in ( '>You must log in to continue', @@ -760,7 +758,7 @@ class FacebookIE(InfoExtractor): v_id = video.get('id') if not v_id: continue - v_id = compat_str(v_id) + v_id = str(v_id) entries.append(self.url_result( self._VIDEO_PAGE_TEMPLATE % v_id, self.ie_key(), v_id, video.get('name'))) @@ -818,7 +816,7 @@ class FacebookIE(InfoExtractor): continue for quality in ('sd', 'hd'): for src_type in ('src', 'src_no_ratelimit'): - src = f[0].get('%s_%s' % (quality, src_type)) + src = f[0].get(f'{quality}_{src_type}') if src: # sd, hd formats w/o resolution info should be deprioritized below DASH # TODO: investigate if progressive or src formats still exist @@ -826,10 +824,10 @@ class FacebookIE(InfoExtractor): if quality == 'hd': preference += 1 formats.append({ - 'format_id': '%s_%s_%s' % (format_id, quality, src_type), + 'format_id': f'{format_id}_{quality}_{src_type}', 'url': src, 'quality': preference, - 'height': 720 if quality == 'hd' else None + 'height': 720 if quality == 'hd' else None, }) extract_dash_manifest(f[0], formats) subtitles_src = f[0].get('subtitles_src') @@ -879,7 +877,7 @@ class FacebookPluginsVideoIE(InfoExtractor): def _real_extract(self, url): return self.url_result( - compat_urllib_parse_unquote(self._match_id(url)), + urllib.parse.unquote(self._match_id(url)), FacebookIE.ie_key()) @@ -940,7 +938,7 @@ class FacebookReelIE(InfoExtractor): 'timestamp': 1637502609, 'upload_date': '20211121', 'thumbnail': r're:^https?://.*', - } + }, }] def _real_extract(self, url): @@ -966,7 +964,7 @@ class FacebookAdsIE(InfoExtractor): 'thumbnail': r're:^https?://.*', 'upload_date': '20231214', 'like_count': int, - } + }, }, { 'url': 'https://www.facebook.com/ads/library/?id=893637265423481', 'info_dict': { @@ -998,7 +996,7 @@ class FacebookAdsIE(InfoExtractor): def _extract_formats(self, video_dict): formats = [] for format_key, format_url in traverse_obj(video_dict, ( - {dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1]) + {dict.items}, lambda _, v: v[0] in self._FORMATS_MAP and url_or_none(v[1]), )): formats.append({ 'format_id': self._FORMATS_MAP[format_key][0], @@ -1035,7 +1033,7 @@ class FacebookAdsIE(InfoExtractor): entries = [] for idx, entry in enumerate(traverse_obj( - data, (('videos', 'cards'), lambda _, v: any([url_or_none(v[f]) for f in self._FORMATS_MAP]))), 1 + data, (('videos', 'cards'), lambda _, v: any(url_or_none(v[f]) for f in self._FORMATS_MAP))), 1, ): entries.append({ 'id': f'{video_id}_{idx}', diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index 1e80f9a..1b1ed39 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get @@ -19,12 +18,12 @@ class FancodeVodIE(InfoExtractor): 'ext': 'mp4', 'title': 'Match Preview: PBKS vs MI', 'thumbnail': r're:^https?://.*\.jpg$', - "timestamp": 1619081590, + 'timestamp': 1619081590, 'view_count': int, 'like_count': int, 'upload_date': '20210422', - 'uploader_id': '6008340455001' - } + 'uploader_id': '6008340455001', + }, }, { 'url': 'https://fancode.com/video/15043', 'only_matching': True, @@ -53,14 +52,14 @@ class FancodeVodIE(InfoExtractor): "refreshToken":"%s" }, "operationName":"RefreshToken" - }''' % password + }''' % password # noqa: UP031 - token_json = self.download_gql('refresh token', data, "Getting the Access token") + token_json = self.download_gql('refresh token', data, 'Getting the Access token') self._ACCESS_TOKEN = try_get(token_json, lambda x: x['data']['refreshToken']['accessToken']) if self._ACCESS_TOKEN is None: self.report_warning('Failed to get Access token') else: - self.headers.update({'Authorization': 'Bearer %s' % self._ACCESS_TOKEN}) + self.headers.update({'Authorization': f'Bearer {self._ACCESS_TOKEN}'}) def _check_login_required(self, is_available, is_premium): msg = None @@ -92,12 +91,12 @@ class FancodeVodIE(InfoExtractor): } }, "operationName":"Video" - }''' % video_id + }''' % video_id # noqa: UP031 metadata_json = self.download_gql(video_id, data, note='Downloading metadata') media = try_get(metadata_json, lambda x: x['data']['media'], dict) or {} - brightcove_video_id = try_get(media, lambda x: x['mediaSource']['brightcove'], compat_str) + brightcove_video_id = try_get(media, lambda x: x['mediaSource']['brightcove'], str) if brightcove_video_id is None: raise ExtractorError('Unable to extract brightcove Video ID') @@ -132,11 +131,11 @@ class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE 'id': '35328', 'ext': 'mp4', 'title': 'BUB vs BLB', - "timestamp": 1624863600, + 'timestamp': 1624863600, 'is_live': True, 'upload_date': '20210628', }, - 'skip': 'Ended' + 'skip': 'Ended', }, { 'url': 'https://fancode.com/match/35328/', 'only_matching': True, @@ -147,7 +146,7 @@ class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE def _real_extract(self, url): - id = self._match_id(url) + video_id = self._match_id(url) data = '''{ "query":"query MatchResponse($id: Int\\u0021, $isLoggedIn: Boolean\\u0021) { match: matchWithScores(id: $id) { id matchDesc mediaId videoStreamId videoStreamUrl { ...VideoSource } liveStreams { videoStreamId videoStreamUrl { ...VideoSource } contentId } name startTime streamingStatus isPremium isUserEntitled @include(if: $isLoggedIn) status metaTags bgImage { src } sport { name slug } tour { id name } squads { name shortName } liveStreams { contentId } mediaId }}fragment VideoSource on VideoSource { title description posterUrl url deliveryType playerType}", "variables":{ @@ -155,21 +154,21 @@ class FancodeLiveIE(FancodeVodIE): # XXX: Do not subclass from concrete IE "isLoggedIn":true }, "operationName":"MatchResponse" - }''' % id + }''' % video_id # noqa: UP031 - info_json = self.download_gql(id, data, "Info json") + info_json = self.download_gql(video_id, data, 'Info json') match_info = try_get(info_json, lambda x: x['data']['match']) - if match_info.get('streamingStatus') != "STARTED": + if match_info.get('streamingStatus') != 'STARTED': raise ExtractorError('The stream can\'t be accessed', expected=True) self._check_login_required(match_info.get('isUserEntitled'), True) # all live streams are premium only return { - 'id': id, + 'id': video_id, 'title': match_info.get('name'), - 'formats': self._extract_akamai_formats(try_get(match_info, lambda x: x['videoStreamUrl']['url']), id), + 'formats': self._extract_akamai_formats(try_get(match_info, lambda x: x['videoStreamUrl']['url']), video_id), 'ext': mimetype2ext(try_get(match_info, lambda x: x['videoStreamUrl']['deliveryType'])), 'is_live': True, - 'release_timestamp': parse_iso8601(match_info.get('startTime')) + 'release_timestamp': parse_iso8601(match_info.get('startTime')), } diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index bbc4b56..eac70f6 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_parse_qs from ..networking import Request from ..utils import ( ExtractorError, @@ -92,7 +92,7 @@ class FC2IE(InfoExtractor): description = self._og_search_description(webpage, default=None) vidplaylist = self._download_json( - 'https://video.fc2.com/api/v3/videoplaylist/%s?sh=1&fs=0' % video_id, video_id, + f'https://video.fc2.com/api/v3/videoplaylist/{video_id}?sh=1&fs=0', video_id, note='Downloading info page') vid_url = traverse_obj(vidplaylist, ('playlist', 'nq')) if not vid_url: @@ -127,22 +127,22 @@ class FC2EmbedIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) - query = compat_parse_qs(mobj.group('query')) + query = urllib.parse.parse_qs(mobj.group('query')) video_id = query['i'][-1] - title = query.get('tl', ['FC2 video %s' % video_id])[0] + title = query.get('tl', [f'FC2 video {video_id}'])[0] sj = query.get('sj', [None])[0] thumbnail = None if sj: # See thumbnailImagePath() in ServerConst.as of flv2.swf - thumbnail = 'http://video%s-thumbnail.fc2.com/up/pic/%s.jpg' % ( + thumbnail = 'http://video{}-thumbnail.fc2.com/up/pic/{}.jpg'.format( sj, '/'.join((video_id[:6], video_id[6:8], video_id[-2], video_id[-1], video_id))) return { '_type': 'url_transparent', 'ie_key': FC2IE.ie_key(), - 'url': 'fc2:%s' % video_id, + 'url': f'fc2:{video_id}', 'title': title, 'thumbnail': thumbnail, } @@ -166,7 +166,7 @@ class FC2LiveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id) + webpage = self._download_webpage(f'https://live.fc2.com/{video_id}/', video_id) self._set_cookie('live.fc2.com', 'js-player_size', '1') @@ -175,7 +175,7 @@ class FC2LiveIE(InfoExtractor): 'channel': '1', 'profile': '1', 'user': '1', - 'streamid': video_id + 'streamid': video_id, }), note='Requesting member info') control_server = self._download_json( @@ -224,7 +224,7 @@ class FC2LiveIE(InfoExtractor): self.write_debug('Goodbye') playlist_data = data break - self.write_debug('Server said: %s%s' % (recv[:100], '...' if len(recv) > 100 else '')) + self.write_debug('Server said: {}{}'.format(recv[:100], '...' if len(recv) > 100 else '')) if not playlist_data: raise ExtractorError('Unable to fetch HLS playlist info via WebSocket') diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py index 69ca87c..af1de7a 100644 --- a/yt_dlp/extractor/filmon.py +++ b/yt_dlp/extractor/filmon.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -35,12 +34,12 @@ class FilmOnIE(InfoExtractor): try: response = self._download_json( - 'https://www.filmon.com/api/vod/movie?id=%s' % video_id, + f'https://www.filmon.com/api/vod/movie?id={video_id}', video_id)['response'] except ExtractorError as e: if isinstance(e.cause, HTTPError): errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason'] - raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {errmsg}', expected=True) raise title = response['title'] @@ -124,10 +123,10 @@ class FilmOnChannelIE(InfoExtractor): except ExtractorError as e: if isinstance(e.cause, HTTPError): errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message'] - raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {errmsg}', expected=True) raise - channel_id = compat_str(channel_data['id']) + channel_id = str(channel_data['id']) is_live = not channel_data.get('is_vod') and not channel_data.get('is_vox') title = channel_data['title'] @@ -155,7 +154,7 @@ class FilmOnChannelIE(InfoExtractor): for name, width, height in self._THUMBNAIL_RES: thumbnails.append({ 'id': name, - 'url': 'http://static.filmon.com/assets/channels/%s/%s.png' % (channel_id, name), + 'url': f'http://static.filmon.com/assets/channels/{channel_id}/{name}.png', 'width': width, 'height': height, }) diff --git a/yt_dlp/extractor/filmweb.py b/yt_dlp/extractor/filmweb.py index cfea1f2..6dde6c3 100644 --- a/yt_dlp/extractor/filmweb.py +++ b/yt_dlp/extractor/filmweb.py @@ -14,7 +14,7 @@ class FilmwebIE(InfoExtractor): 'timestamp': 1458140101, 'uploader_id': '12639966', 'uploader': 'Live Roaldset', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/firsttv.py b/yt_dlp/extractor/firsttv.py index f74bd13..2d47ee5 100644 --- a/yt_dlp/extractor/firsttv.py +++ b/yt_dlp/extractor/firsttv.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( int_or_none, qualities, @@ -60,12 +58,12 @@ class FirstTVIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - playlist_url = compat_urlparse.urljoin(url, self._search_regex( + playlist_url = urllib.parse.urljoin(url, self._search_regex( r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'playlist url', group='url')) - parsed_url = compat_urlparse.urlparse(playlist_url) - qs = compat_urlparse.parse_qs(parsed_url.query) + parsed_url = urllib.parse.urlparse(playlist_url) + qs = urllib.parse.parse_qs(parsed_url.query) item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]') items = self._download_json(playlist_url, display_id) @@ -73,12 +71,12 @@ class FirstTVIE(InfoExtractor): if item_ids: items = [ item for item in items - if item.get('uid') and compat_str(item['uid']) in item_ids] + if item.get('uid') and str(item['uid']) in item_ids] else: items = [items[0]] entries = [] - QUALITIES = ('ld', 'sd', 'hd', ) + QUALITIES = ('ld', 'sd', 'hd') for item in items: title = item['title'] @@ -116,11 +114,10 @@ class FirstTVIE(InfoExtractor): if len(formats) == 1: m3u8_path = ',' else: - tbrs = [compat_str(t) for t in sorted(f['tbr'] for f in formats)] - m3u8_path = '_,%s,%s' % (','.join(tbrs), '.mp4') + tbrs = [str(t) for t in sorted(f['tbr'] for f in formats)] + m3u8_path = '_,{},{}'.format(','.join(tbrs), '.mp4') formats.extend(self._extract_m3u8_formats( - 'http://balancer-vod.1tv.ru/%s%s.urlset/master.m3u8' - % (path, m3u8_path), + f'http://balancer-vod.1tv.ru/{path}{m3u8_path}.urlset/master.m3u8', display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) @@ -131,12 +128,12 @@ class FirstTVIE(InfoExtractor): 'ya:ovs:upload_date', webpage, 'upload date', default=None)) entries.append({ - 'id': compat_str(item.get('id') or item['uid']), + 'id': str(item.get('id') or item['uid']), 'thumbnail': thumbnail, 'title': title, 'upload_date': upload_date, 'duration': int_or_none(duration), - 'formats': formats + 'formats': formats, }) title = self._html_search_regex( diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py index 89a40d7..507bfe9 100644 --- a/yt_dlp/extractor/flickr.py +++ b/yt_dlp/extractor/flickr.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( ExtractorError, format_field, @@ -31,7 +29,7 @@ class FlickrIE(InfoExtractor): 'view_count': int, 'tags': list, 'license': 'Attribution-ShareAlike', - } + }, } _API_BASE_URL = 'https://api.flickr.com/services/rest?' # https://help.yahoo.com/kb/flickr/SLN25525.html @@ -52,14 +50,14 @@ class FlickrIE(InfoExtractor): def _call_api(self, method, video_id, api_key, note, secret=None): query = { 'photo_id': video_id, - 'method': 'flickr.%s' % method, + 'method': f'flickr.{method}', 'api_key': api_key, 'format': 'json', 'nojsoncallback': 1, } if secret: query['secret'] = secret - data = self._download_json(self._API_BASE_URL + compat_urllib_parse_urlencode(query), video_id, note) + data = self._download_json(self._API_BASE_URL + urllib.parse.urlencode(query), video_id, note) if data['stat'] != 'ok': raise ExtractorError(data['message']) return data @@ -83,7 +81,7 @@ class FlickrIE(InfoExtractor): formats = [] for stream in streams['stream']: - stream_type = compat_str(stream.get('type')) + stream_type = str(stream.get('type')) formats.append({ 'format_id': stream_type, 'url': stream['_content'], diff --git a/yt_dlp/extractor/floatplane.py b/yt_dlp/extractor/floatplane.py index 8676d73..b7ee160 100644 --- a/yt_dlp/extractor/floatplane.py +++ b/yt_dlp/extractor/floatplane.py @@ -211,7 +211,7 @@ class FloatplaneIE(InfoExtractor): stream = self._download_json( 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={ 'type': 'vod' if media_typ == 'video' else 'aod', - 'guid': metadata['guid'] + 'guid': metadata['guid'], }, note=f'Downloading {media_typ} stream data') path_template = traverse_obj(stream, ('resource', 'uri', {str})) diff --git a/yt_dlp/extractor/folketinget.py b/yt_dlp/extractor/folketinget.py index 55a11e5..9509570 100644 --- a/yt_dlp/extractor/folketinget.py +++ b/yt_dlp/extractor/folketinget.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_parse_qs from ..utils import ( int_or_none, parse_duration, @@ -42,7 +43,7 @@ class FolketingetIE(InfoExtractor): r'(?s)<div class="video-item-agenda"[^>]*>(.*?)<', webpage, 'description', fatal=False) - player_params = compat_parse_qs(self._search_regex( + player_params = urllib.parse.parse_qs(self._search_regex( r'<embed src="http://ft\.arkena\.tv/flash/ftplayer\.swf\?([^"]+)"', webpage, 'player params')) xml_url = player_params['xml'][0] diff --git a/yt_dlp/extractor/footyroom.py b/yt_dlp/extractor/footyroom.py index 4a1316b..797e894 100644 --- a/yt_dlp/extractor/footyroom.py +++ b/yt_dlp/extractor/footyroom.py @@ -19,7 +19,7 @@ class FootyRoomIE(InfoExtractor): 'title': 'VIDEO Georgia 0 - 2 Germany', }, 'playlist_count': 1, - 'add_ie': ['Playwire'] + 'add_ie': ['Playwire'], }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/fourtube.py b/yt_dlp/extractor/fourtube.py index b6368b8..ba94b5b 100644 --- a/yt_dlp/extractor/fourtube.py +++ b/yt_dlp/extractor/fourtube.py @@ -1,12 +1,8 @@ +import base64 import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_str, - compat_urllib_parse_unquote, - compat_urlparse, -) from ..utils import ( int_or_none, parse_duration, @@ -21,21 +17,20 @@ from ..utils import ( class FourTubeBaseIE(InfoExtractor): def _extract_formats(self, url, video_id, media_id, sources): - token_url = 'https://%s/%s/desktop/%s' % ( + token_url = 'https://{}/{}/desktop/{}'.format( self._TKN_HOST, media_id, '+'.join(sources)) - parsed_url = compat_urlparse.urlparse(url) + parsed_url = urllib.parse.urlparse(url) tokens = self._download_json(token_url, video_id, data=b'', headers={ - 'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname), + 'Origin': f'{parsed_url.scheme}://{parsed_url.hostname}', 'Referer': url, }) - formats = [{ - 'url': tokens[format]['token'], - 'format_id': format + 'p', - 'resolution': format + 'p', - 'quality': int(format), - } for format in sources] - return formats + return [{ + 'url': tokens[res]['token'], + 'format_id': res + 'p', + 'resolution': res + 'p', + 'quality': int(res), + } for res in sources] def _real_extract(self, url): mobj = self._match_valid_url(url) @@ -89,9 +84,9 @@ class FourTubeBaseIE(InfoExtractor): params_js = self._search_regex( r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)', player_js, 'initialization parameters') - params = self._parse_json('[%s]' % params_js, video_id) + params = self._parse_json(f'[{params_js}]', video_id) media_id = params[0] - sources = ['%s' % p for p in params[2]] + sources = [f'{p}' for p in params[2]] formats = self._extract_formats(url, video_id, media_id, sources) @@ -234,20 +229,20 @@ class PornTubeIE(FourTubeBaseIE): self._search_regex( r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'data', group='value'), video_id, - transform_source=lambda x: compat_urllib_parse_unquote( - compat_b64decode(x).decode('utf-8')))['page']['video'] + transform_source=lambda x: urllib.parse.unquote( + base64.b64decode(x).decode('utf-8')))['page']['video'] title = video['title'] media_id = video['mediaId'] - sources = [compat_str(e['height']) + sources = [str(e['height']) for e in video['encodings'] if e.get('height')] formats = self._extract_formats(url, video_id, media_id, sources) thumbnail = url_or_none(video.get('masterThumb')) - uploader = try_get(video, lambda x: x['user']['username'], compat_str) + uploader = try_get(video, lambda x: x['user']['username'], str) uploader_id = str_or_none(try_get( video, lambda x: x['user']['id'], int)) - channel = try_get(video, lambda x: x['channel']['name'], compat_str) + channel = try_get(video, lambda x: x['channel']['name'], str) channel_id = str_or_none(try_get( video, lambda x: x['channel']['id'], int)) like_count = int_or_none(video.get('likes')) diff --git a/yt_dlp/extractor/fox.py b/yt_dlp/extractor/fox.py index e00e977..e3cf22d 100644 --- a/yt_dlp/extractor/fox.py +++ b/yt_dlp/extractor/fox.py @@ -1,11 +1,8 @@ import json +import urllib.parse import uuid from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -59,7 +56,7 @@ class FOXIE(InfoExtractor): _HOME_PAGE_URL = 'https://www.fox.com/' _API_KEY = '6E9S4bmcoNnZwVLOHywOv8PJEdu76cM9' _access_token = None - _device_id = compat_str(uuid.uuid4()) + _device_id = str(uuid.uuid4()) def _call_api(self, path, video_id, data=None): headers = { @@ -88,7 +85,7 @@ class FOXIE(InfoExtractor): if not self._access_token: mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth') if mvpd_auth: - self._access_token = (self._parse_json(compat_urllib_parse_unquote( + self._access_token = (self._parse_json(urllib.parse.unquote( mvpd_auth.value), None, fatal=False) or {}).get('accessToken') if not self._access_token: self._access_token = self._call_api( @@ -100,7 +97,7 @@ class FOXIE(InfoExtractor): video_id = self._match_id(url) self._access_token = self._call_api( - 'previewpassmvpd?device_id=%s&mvpd_id=TempPass_fbcfox_60min' % self._device_id, + f'previewpassmvpd?device_id={self._device_id}&mvpd_id=TempPass_fbcfox_60min', video_id)['accessToken'] video = self._call_api('watch', video_id, data=json.dumps({ @@ -113,13 +110,13 @@ class FOXIE(InfoExtractor): 'provider': { 'freewheel': {'did': self._device_id}, 'vdms': {'rays': ''}, - 'dmp': {'kuid': '', 'seg': ''} + 'dmp': {'kuid': '', 'seg': ''}, }, 'playlist': '', 'privacy': {'us': '1---'}, 'siteSection': '', 'streamType': 'vod', - 'streamId': video_id}).encode('utf-8')) + 'streamId': video_id}).encode()) title = video['name'] release_url = video['url'] diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py index 85613ba..db9b2e1 100644 --- a/yt_dlp/extractor/fptplay.py +++ b/yt_dlp/extractor/fptplay.py @@ -84,7 +84,7 @@ class FptplayIE(InfoExtractor): a = [0, 0, 0, 0] s = len(e) c = 0 - for z in range(s, 0, -1): + for _ in range(s, 0, -1): if n <= 3: i[n] = e[c] n += 1 diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 7b8f7dd..ab08f1c 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from .dailymotion import DailymotionIE from ..networking import HEADRequest from ..utils import ( + clean_html, determine_ext, filter_dict, format_field, @@ -33,6 +34,7 @@ class FranceTVIE(InfoExtractor): _GEO_BYPASS = False _TESTS = [{ + # tokenized url is in dinfo['video']['token'] 'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1', 'info_dict': { 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', @@ -45,6 +47,19 @@ class FranceTVIE(InfoExtractor): }, 'params': {'skip_download': 'm3u8'}, }, { + # tokenized url is in dinfo['video']['token']['akamai'] + 'url': 'francetv:c5bda21d-2c6f-4470-8849-3d8327adb2ba', + 'info_dict': { + 'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba', + 'ext': 'mp4', + 'title': '13h15, le dimanche... - Les mystères de Jésus', + 'timestamp': 1514118300, + 'duration': 2880, + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20171224', + }, + 'params': {'skip_download': 'm3u8'}, + }, { 'url': 'francetv:162311093', 'only_matching': True, }, { @@ -68,6 +83,7 @@ class FranceTVIE(InfoExtractor): def _extract_video(self, video_id, hostname=None): is_live = None videos = [] + drm_formats = False title = None subtitle = None episode_number = None @@ -85,13 +101,12 @@ class FranceTVIE(InfoExtractor): 'device_type': device_type, 'browser': browser, 'domain': hostname, - }), fatal=False) + }), fatal=False, expected_status=422) # 422 json gives detailed error code/message if not dinfo: continue - video = traverse_obj(dinfo, ('video', {dict})) - if video: + if video := traverse_obj(dinfo, ('video', {dict})): videos.append(video) if duration is None: duration = video.get('duration') @@ -99,9 +114,19 @@ class FranceTVIE(InfoExtractor): is_live = video.get('is_live') if spritesheets is None: spritesheets = video.get('spritesheets') + elif code := traverse_obj(dinfo, ('code', {int})): + if code == 2009: + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + elif code in (2015, 2017): + # 2015: L'accès à cette vidéo est impossible. (DRM-only) + # 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM) + drm_formats = True + continue + self.report_warning( + f'{self.IE_NAME} said: {code} "{clean_html(dinfo.get("message"))}"') + continue - meta = traverse_obj(dinfo, ('meta', {dict})) - if meta: + if meta := traverse_obj(dinfo, ('meta', {dict})): if title is None: title = meta.get('title') # meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>" @@ -114,12 +139,15 @@ class FranceTVIE(InfoExtractor): if timestamp is None: timestamp = parse_iso8601(meta.get('broadcasted_at')) + if not videos and drm_formats: + self.report_drm(video_id) + formats, subtitles, video_url = [], {}, None for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])): video_url = video['url'] format_id = video.get('format') - if token_url := url_or_none(video.get('token')): + if token_url := traverse_obj(video, ('token', (None, 'akamai'), {url_or_none}, any)): tokenized_url = traverse_obj(self._download_json( token_url, video_id, f'Downloading signed {format_id} manifest URL', fatal=False, query={ @@ -175,7 +203,7 @@ class FranceTVIE(InfoExtractor): for f in formats: if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'): f['language_preference'] = -10 - f['format_note'] = 'audio description%s' % format_field(f, 'format_note', ', %s') + f['format_note'] = 'audio description{}'.format(format_field(f, 'format_note', ', %s')) if spritesheets: formats.append({ @@ -189,10 +217,10 @@ class FranceTVIE(InfoExtractor): 'fragments': [{ 'url': sheet, # XXX: not entirely accurate; each spritesheet seems to be - # a 10×10 grid of thumbnails corresponding to approximately + # a 10x10 grid of thumbnails corresponding to approximately # 2 seconds of the video; the last spritesheet may be shorter 'duration': 200, - } for sheet in traverse_obj(spritesheets, (..., {url_or_none}))] + } for sheet in traverse_obj(spritesheets, (..., {url_or_none}))], }) return { @@ -225,13 +253,13 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', + 'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba', 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', - 'timestamp': 1502623500, - 'duration': 2580, + 'timestamp': 1514118300, + 'duration': 2880, 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20170813', + 'upload_date': '20171224', }, 'params': { 'skip_download': True, diff --git a/yt_dlp/extractor/freesound.py b/yt_dlp/extractor/freesound.py index fcde044..b8b8fe8 100644 --- a/yt_dlp/extractor/freesound.py +++ b/yt_dlp/extractor/freesound.py @@ -23,7 +23,7 @@ class FreesoundIE(InfoExtractor): 'uploader': 'miklovan', 'upload_date': '20130715', 'tags': list, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/freetv.py b/yt_dlp/extractor/freetv.py index 757a10d..518a668 100644 --- a/yt_dlp/extractor/freetv.py +++ b/yt_dlp/extractor/freetv.py @@ -23,7 +23,7 @@ class FreeTvMoviesIE(FreeTvBaseIE): 'title': 'Atrápame Si Puedes', 'description': 'md5:ca63bc00898aeb2f64ec87c6d3a5b982', 'ext': 'mp4', - } + }, }, { 'url': 'https://www.freetv.com/peliculas/monstruoso/', 'md5': '509c15c68de41cb708d1f92d071f20aa', @@ -32,7 +32,7 @@ class FreeTvMoviesIE(FreeTvBaseIE): 'title': 'Monstruoso', 'description': 'md5:333fc19ee327b457b980e54a911ea4a3', 'ext': 'mp4', - } + }, }] def _extract_video(self, content_id, action='olyott_video_play'): @@ -72,7 +72,7 @@ class FreeTvIE(FreeTvBaseIE): 'info_dict': { 'id': 'el-detective-l', 'title': 'El Detective L', - 'description': 'md5:f9f1143bc33e9856ecbfcbfb97a759be' + 'description': 'md5:f9f1143bc33e9856ecbfcbfb97a759be', }, 'playlist_count': 24, }, { @@ -80,7 +80,7 @@ class FreeTvIE(FreeTvBaseIE): 'info_dict': { 'id': 'esmeraldas', 'title': 'Esmeraldas', - 'description': 'md5:43d7ec45bd931d8268a4f5afaf4c77bf' + 'description': 'md5:43d7ec45bd931d8268a4f5afaf4c77bf', }, 'playlist_count': 62, }, { @@ -88,7 +88,7 @@ class FreeTvIE(FreeTvBaseIE): 'info_dict': { 'id': 'las-aventuras-de-leonardo', 'title': 'Las Aventuras de Leonardo', - 'description': 'md5:0c47130846c141120a382aca059288f6' + 'description': 'md5:0c47130846c141120a382aca059288f6', }, 'playlist_count': 13, }, diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index 3bae8ad..b5176aa 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( ExtractorError, parse_duration, @@ -22,7 +19,7 @@ class FrontendMastersBaseIE(InfoExtractor): _QUALITIES = { 'low': {'width': 480, 'height': 360}, 'mid': {'width': 1280, 'height': 720}, - 'high': {'width': 1920, 'height': 1080} + 'high': {'width': 1920, 'height': 1080}, } def _perform_login(self, username, password): @@ -33,7 +30,7 @@ class FrontendMastersBaseIE(InfoExtractor): login_form.update({ 'username': username, - 'password': password + 'password': password, }) post_url = self._search_regex( @@ -41,7 +38,7 @@ class FrontendMastersBaseIE(InfoExtractor): 'post_url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( post_url, None, 'Logging in', data=urlencode_postdata(login_form), @@ -56,14 +53,14 @@ class FrontendMastersBaseIE(InfoExtractor): r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<', response, 'error message', default=None, group='error') if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') class FrontendMastersPageBaseIE(FrontendMastersBaseIE): def _download_course(self, course_name, url): return self._download_json( - '%s/courses/%s' % (self._API_BASE, course_name), course_name, + f'{self._API_BASE}/courses/{course_name}', course_name, 'Downloading course JSON', headers={'Referer': url}) @staticmethod @@ -92,7 +89,7 @@ class FrontendMastersPageBaseIE(FrontendMastersBaseIE): duration = None timestamp = lesson.get('timestamp') - if isinstance(timestamp, compat_str): + if isinstance(timestamp, str): mobj = re.search( r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})', timestamp) @@ -102,7 +99,7 @@ class FrontendMastersPageBaseIE(FrontendMastersBaseIE): return { '_type': 'url_transparent', - 'url': 'frontendmasters:%s' % lesson_id, + 'url': f'frontendmasters:{lesson_id}', 'ie_key': FrontendMastersIE.ie_key(), 'id': lesson_id, 'display_id': display_id, @@ -134,16 +131,16 @@ class FrontendMastersIE(FrontendMastersBaseIE): def _real_extract(self, url): lesson_id = self._match_id(url) - source_url = '%s/video/%s/source' % (self._API_BASE, lesson_id) + source_url = f'{self._API_BASE}/video/{lesson_id}/source' formats = [] for ext in ('webm', 'mp4'): for quality in ('low', 'mid', 'high'): resolution = self._QUALITIES[quality].copy() - format_id = '%s-%s' % (ext, quality) + format_id = f'{ext}-{quality}' format_url = self._download_json( source_url, lesson_id, - 'Downloading %s source JSON' % format_id, query={ + f'Downloading {format_id} source JSON', query={ 'f': ext, 'r': resolution['height'], }, headers={ @@ -163,15 +160,15 @@ class FrontendMastersIE(FrontendMastersBaseIE): subtitles = { 'en': [{ - 'url': '%s/transcripts/%s.vtt' % (self._API_BASE, lesson_id), - }] + 'url': f'{self._API_BASE}/transcripts/{lesson_id}.vtt', + }], } return { 'id': lesson_id, 'title': lesson_id, 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py index 77e826e..a2d1a82 100644 --- a/yt_dlp/extractor/fujitv.py +++ b/yt_dlp/extractor/fujitv.py @@ -34,7 +34,7 @@ class FujiTVFODPlus7IE(InfoExtractor): 'series': 'ちびまる子ちゃん', 'series_id': '5d40', 'thumbnail': 'https://i.fod.fujitv.co.jp/img/program/5d40/episode/5d40810083_a.jpg'}, - 'skip': 'Video available only in one week' + 'skip': 'Video available only in one week', }] def _real_extract(self, url): @@ -43,7 +43,9 @@ class FujiTVFODPlus7IE(InfoExtractor): json_info = {} token = self._get_cookies(url).get('CT') if token: - json_info = self._download_json('https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id=%s&is_premium=false' % video_id, video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False) + json_info = self._download_json( + f'https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id={video_id}&is_premium=false', + video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False) else: self.report_warning(f'The token cookie is needed to extract video metadata. {self._login_hint("cookies")}') formats, subtitles = [], {} @@ -67,5 +69,5 @@ class FujiTVFODPlus7IE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, 'thumbnail': f'{self._BASE_URL}img/program/{series_id}/episode/{video_id}_a.jpg', - '_format_sort_fields': ('tbr', ) + '_format_sort_fields': ('tbr', ), } diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index c32f005..d3e61c8 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -96,7 +96,7 @@ class FunimationPageIE(FunimationBaseIE): f'{show}_{episode}', query={ 'deviceType': 'web', 'region': self._REGION, - 'locale': locale or 'en' + 'locale': locale or 'en', }), ('videoList', ..., 'id'), get_all=False) return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id) @@ -157,7 +157,7 @@ class FunimationIE(FunimationBaseIE): yield lang, version.title(), f def _get_episode(self, webpage, experience_id=None, episode_id=None, fatal=True): - ''' Extract the episode, season and show objects given either episode/experience id ''' + """ Extract the episode, season and show objects given either episode/experience id """ show = self._parse_json( self._search_regex( r'show\s*=\s*({.+?})\s*;', webpage, 'show data', fatal=fatal), @@ -199,16 +199,16 @@ class FunimationIE(FunimationBaseIE): continue thumbnails.append({'url': fmt.get('poster')}) duration = max(duration, fmt.get('duration', 0)) - format_name = '%s %s (%s)' % (version, lang, experience_id) + format_name = f'{version} {lang} ({experience_id})' self.extract_subtitles( subtitles, experience_id, display_id=display_id, format_name=format_name, episode=episode if experience_id == initial_experience_id else episode_id) headers = {} if self._TOKEN: - headers['Authorization'] = 'Token %s' % self._TOKEN + headers['Authorization'] = f'Token {self._TOKEN}' page = self._download_json( - 'https://www.funimation.com/api/showexperience/%s/' % experience_id, + f'https://www.funimation.com/api/showexperience/{experience_id}/', display_id, headers=headers, expected_status=403, query={ 'pinst_id': ''.join(random.choices(string.digits + string.ascii_letters, k=8)), }, note=f'Downloading {format_name} JSON') @@ -216,7 +216,7 @@ class FunimationIE(FunimationBaseIE): if not sources: error = try_get(page, lambda x: x['errors'][0], dict) if error: - self.report_warning('%s said: Error %s - %s' % ( + self.report_warning('{} said: Error {} - {}'.format( self.IE_NAME, error.get('code'), error.get('detail') or error.get('title'))) else: self.report_warning('No sources found for format') @@ -227,11 +227,11 @@ class FunimationIE(FunimationBaseIE): source_type = source.get('videoType') or determine_ext(source_url) if source_type == 'm3u8': current_formats.extend(self._extract_m3u8_formats( - source_url, display_id, 'mp4', m3u8_id='%s-%s' % (experience_id, 'hls'), fatal=False, + source_url, display_id, 'mp4', m3u8_id='{}-{}'.format(experience_id, 'hls'), fatal=False, note=f'Downloading {format_name} m3u8 information')) else: current_formats.append({ - 'format_id': '%s-%s' % (experience_id, source_type), + 'format_id': f'{experience_id}-{source_type}', 'url': source_url, }) for f in current_formats: @@ -284,7 +284,7 @@ class FunimationIE(FunimationBaseIE): sub_type = sub_type if sub_type != 'FULL' else None current_sub = { 'url': text_track['src'], - 'name': join_nonempty(version, text_track.get('label'), sub_type, delim=' ') + 'name': join_nonempty(version, text_track.get('label'), sub_type, delim=' '), } lang = join_nonempty(text_track.get('language', 'und'), version if version != 'Simulcast' else None, @@ -302,7 +302,7 @@ class FunimationShowIE(FunimationBaseIE): 'url': 'https://www.funimation.com/en/shows/sk8-the-infinity', 'info_dict': { 'id': '1315000', - 'title': 'SK8 the Infinity' + 'title': 'SK8 the Infinity', }, 'playlist_count': 13, 'params': { @@ -313,7 +313,7 @@ class FunimationShowIE(FunimationBaseIE): 'url': 'https://www.funimation.com/shows/ouran-high-school-host-club/', 'info_dict': { 'id': '39643', - 'title': 'Ouran High School Host Club' + 'title': 'Ouran High School Host Club', }, 'playlist_count': 26, 'params': { @@ -329,11 +329,11 @@ class FunimationShowIE(FunimationBaseIE): base_url, locale, display_id = self._match_valid_url(url).groups() show_info = self._download_json( - 'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=%s&deviceType=web&locale=%s' - % (display_id, self._REGION, locale or 'en'), display_id) + 'https://title-api.prd.funimationsvc.com/v2/shows/{}?region={}&deviceType=web&locale={}'.format( + display_id, self._REGION, locale or 'en'), display_id) items_info = self._download_json( - 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s' - % show_info.get('id'), display_id) + 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id={}'.format( + show_info.get('id')), display_id) vod_items = traverse_obj(items_info, ('items', ..., lambda k, _: re.match(r'(?i)mostRecent[AS]vod', k), 'item')) @@ -343,7 +343,7 @@ class FunimationShowIE(FunimationBaseIE): 'title': show_info['name'], 'entries': orderedSet( self.url_result( - '%s/%s' % (base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(), + '{}/{}'.format(base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(), vod_item.get('episodeId'), vod_item.get('episodeName')) for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder', -1))), } diff --git a/yt_dlp/extractor/funker530.py b/yt_dlp/extractor/funker530.py index 62fd7f6..5d59e9c 100644 --- a/yt_dlp/extractor/funker530.py +++ b/yt_dlp/extractor/funker530.py @@ -25,7 +25,7 @@ class Funker530IE(InfoExtractor): 'timestamp': 1686241321, 'live_status': 'not_live', 'description': 'md5:bea2e1f458095414e04b5ac189c2f980', - } + }, }, { 'url': 'https://funker530.com/video/my-friends-joined-the-russians-civdiv/', 'md5': 'a42c2933391210662e93e867d7124b70', @@ -54,7 +54,7 @@ class Funker530IE(InfoExtractor): 'upload_date': '20230608', 'playable_in_embed': True, 'heatmap': 'count:100', - } + }, }] def _real_extract(self, url): @@ -76,5 +76,5 @@ class Funker530IE(InfoExtractor): '_type': 'url_transparent', 'description': strip_or_none(self._search_regex( r'(?s)(.+)About the Author', clean_html(get_element_by_class('video-desc-paragraph', webpage)), - 'description', default=None)) + 'description', default=None)), } diff --git a/yt_dlp/extractor/fuyintv.py b/yt_dlp/extractor/fuyintv.py index 197901d..f46839b 100644 --- a/yt_dlp/extractor/fuyintv.py +++ b/yt_dlp/extractor/fuyintv.py @@ -11,7 +11,7 @@ class FuyinTVIE(InfoExtractor): 'ext': 'mp4', 'title': '第1集', 'description': 'md5:21a3d238dc8d49608e1308e85044b9c3', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index c10d290..024628e 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -22,29 +22,29 @@ class GabTVIE(InfoExtractor): 'uploader': 'Wurzelroot', 'uploader_id': '608fb0a85738fd1974984f7d', 'thumbnail': 'https://tv.gab.com/image/61217eacea5665de450d0488', - } + }, }] def _real_extract(self, url): - id = self._match_id(url).split('-')[-1] - webpage = self._download_webpage(url, id) + video_id = self._match_id(url).split('-')[-1] + webpage = self._download_webpage(url, video_id) channel_id = self._search_regex(r'data-channel-id=\"(?P<channel_id>[^\"]+)', webpage, 'channel_id') channel_name = self._search_regex(r'data-channel-name=\"(?P<channel_id>[^\"]+)', webpage, 'channel_name') title = self._search_regex(r'data-episode-title=\"(?P<channel_id>[^\"]+)', webpage, 'title') view_key = self._search_regex(r'data-view-key=\"(?P<channel_id>[^\"]+)', webpage, 'view_key') description = clean_html( self._html_search_regex(self._meta_regex('description'), webpage, 'description', group='content')) or None - available_resolutions = re.findall(r'<a\ data-episode-id=\"%s\"\ data-resolution=\"(?P<resolution>[^\"]+)' % id, - webpage) + available_resolutions = re.findall( + rf'<a\ data-episode-id=\"{video_id}\"\ data-resolution=\"(?P<resolution>[^\"]+)', webpage) formats = [] for resolution in available_resolutions: frmt = { - 'url': f'https://tv.gab.com/media/{id}?viewKey={view_key}&r={resolution}', + 'url': f'https://tv.gab.com/media/{video_id}?viewKey={view_key}&r={resolution}', 'format_id': resolution, 'vcodec': 'h264', 'acodec': 'aac', - 'ext': 'mp4' + 'ext': 'mp4', } if 'audio-' in resolution: frmt['abr'] = str_to_int(resolution.replace('audio-', '')) @@ -55,13 +55,13 @@ class GabTVIE(InfoExtractor): formats.append(frmt) return { - 'id': id, + 'id': video_id, 'title': title, 'formats': formats, 'description': description, 'uploader': channel_name, 'uploader_id': channel_id, - 'thumbnail': f'https://tv.gab.com/image/{id}', + 'thumbnail': f'https://tv.gab.com/image/{video_id}', } @@ -79,7 +79,7 @@ class GabIE(InfoExtractor): 'description': 'md5:204055fafd5e1a519f5d6db953567ca3', 'timestamp': 1635192289, 'upload_date': '20211025', - } + }, }, { 'url': 'https://gab.com/TheLonelyProud/posts/107045884469287653', 'md5': 'f9cefcfdff6418e392611a828d47839d', @@ -91,7 +91,7 @@ class GabIE(InfoExtractor): 'timestamp': 1633390571, 'upload_date': '20211004', 'uploader': 'TheLonelyProud', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/gaia.py b/yt_dlp/extractor/gaia.py index c84386f..048ea51 100644 --- a/yt_dlp/extractor/gaia.py +++ b/yt_dlp/extractor/gaia.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) from ..utils import ( ExtractorError, int_or_none, @@ -52,7 +50,7 @@ class GaiaIE(InfoExtractor): def _real_initialize(self): auth = self._get_cookies('https://www.gaia.com/').get('auth') if auth: - auth = self._parse_json(compat_urllib_parse_unquote(auth.value), None, fatal=False) + auth = self._parse_json(urllib.parse.unquote(auth.value), None, fatal=False) self._jwt = auth.get('jwt') def _perform_login(self, username, password): @@ -62,7 +60,7 @@ class GaiaIE(InfoExtractor): 'https://auth.gaia.com/v1/login', None, data=urlencode_postdata({ 'username': username, - 'password': password + 'password': password, })) if auth.get('success') is False: raise ExtractorError(', '.join(auth['messages']), expected=True) @@ -77,7 +75,7 @@ class GaiaIE(InfoExtractor): node = self._download_json( 'https://brooklyn.gaia.com/node/%d' % node_id, node_id) vdata = node[vtype] - media_id = compat_str(vdata['nid']) + media_id = str(vdata['nid']) title = node['title'] headers = None @@ -115,7 +113,7 @@ class GaiaIE(InfoExtractor): 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])), 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])), 'comment_count': int_or_none(node.get('comment_count')), - 'series': try_get(node, lambda x: x['series']['title'], compat_str), + 'series': try_get(node, lambda x: x['series']['title'], str), 'season_number': int_or_none(get_field_value('season')), 'season_id': str_or_none(get_field_value('series_nid', 'nid')), 'episode_number': int_or_none(get_field_value('episode')), diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py index b284e1e..01386c1 100644 --- a/yt_dlp/extractor/gamejolt.py +++ b/yt_dlp/extractor/gamejolt.py @@ -1,9 +1,9 @@ import itertools import json import math +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote from ..utils import ( determine_ext, format_field, @@ -45,7 +45,7 @@ class GameJoltBaseIE(InfoExtractor): 'comments/Fireside_Post/%s/%s?%s=%d' % ( post_num_id, sort_by, 'scroll_id' if is_scrolled else 'page', scroll_id if is_scrolled else page), - post_hash_id, note='Downloading comments list page %d' % page) + post_hash_id, note=f'Downloading comments list page {page}') if not comments_data.get('comments'): break for comment in traverse_obj(comments_data, (('comments', 'childComments'), ...), expected_type=dict): @@ -87,7 +87,7 @@ class GameJoltBaseIE(InfoExtractor): 'uploader': user_data.get('display_name') or user_data.get('name'), 'uploader_id': user_data.get('username'), 'uploader_url': format_field(user_data, 'url', 'https://gamejolt.com%s'), - 'categories': [try_get(category, lambda x: '%s - %s' % (x['community']['name'], x['channel'].get('display_title') or x['channel']['title'])) + 'categories': [try_get(category, lambda x: '{} - {}'.format(x['community']['name'], x['channel'].get('display_title') or x['channel']['title'])) for category in post_data.get('communities') or []], 'tags': traverse_obj( lead_content, ('content', ..., 'content', ..., 'marks', ..., 'attrs', 'tag'), expected_type=str_or_none), @@ -95,7 +95,7 @@ class GameJoltBaseIE(InfoExtractor): 'comment_count': int_or_none(post_data.get('comment_count'), default=0), 'timestamp': int_or_none(post_data.get('added_on'), scale=1000), 'release_timestamp': int_or_none(post_data.get('published_on'), scale=1000), - '__post_extractor': self.extract_comments(post_data.get('id'), post_id) + '__post_extractor': self.extract_comments(post_data.get('id'), post_id), } # TODO: Handle multiple videos/embeds? @@ -152,7 +152,7 @@ class GameJoltBaseIE(InfoExtractor): 'height': media.get('height') if url_key == 'img_url' else None, 'filesize': media.get('filesize') if url_key == 'img_url' else None, 'acodec': 'none', - } for url_key in ('img_url', 'mediaserver_url', 'mediaserver_url_mp4', 'mediaserver_url_webm') if media.get(url_key)] + } for url_key in ('img_url', 'mediaserver_url', 'mediaserver_url_mp4', 'mediaserver_url_webm') if media.get(url_key)], }) if gif_entries: return { @@ -192,7 +192,7 @@ class GameJoltIE(GameJoltBaseIE): 'like_count': int, 'comment_count': int, 'view_count': int, - } + }, }, { # YouTube embed 'url': 'https://gamejolt.com/p/hey-hey-if-there-s-anyone-who-s-looking-to-get-into-learning-a-n6g4jzpq', @@ -220,7 +220,7 @@ class GameJoltIE(GameJoltBaseIE): 'upload_date': '20211015', 'view_count': int, 'chapters': 'count:18', - } + }, }, { # Article 'url': 'https://gamejolt.com/p/i-fuckin-broke-chaos-d56h3eue', @@ -243,7 +243,7 @@ class GameJoltIE(GameJoltBaseIE): 'like_count': int, 'comment_count': int, 'view_count': int, - } + }, }, { # Single GIF 'url': 'https://gamejolt.com/p/hello-everyone-i-m-developing-a-pixel-art-style-mod-for-fnf-and-i-vs4gdrd8', @@ -267,7 +267,7 @@ class GameJoltIE(GameJoltBaseIE): 'id': 'dszyjnwi', 'ext': 'webm', 'title': 'gif-presentacion-mejorado-dszyjnwi', - } + }, }], 'playlist_count': 1, }, { @@ -310,7 +310,7 @@ class GameJoltPostListBaseIE(GameJoltBaseIE): endpoint, list_id, note=f'{note} page {page_num}', errnote=errnote, data=json.dumps({ 'scrollDirection': 'from', 'scrollId': scroll_id, - }).encode('utf-8')).get('items') + }).encode()).get('items') class GameJoltUserIE(GameJoltPostListBaseIE): @@ -348,7 +348,7 @@ class GameJoltGameIE(GameJoltPostListBaseIE): 'info_dict': { 'id': '655124', 'title': 'Friday Night Funkin\': Friday 4 Fun', - 'description': 'md5:576a7dd87912a2dcf33c50d2bd3966d3' + 'description': 'md5:576a7dd87912a2dcf33c50d2bd3966d3', }, 'params': { 'ignore_no_formats_error': True, @@ -383,7 +383,7 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE): 'url': r're:^https://.+vs-oswald-menu-music\.mp3$', 'release_timestamp': 1635190816, 'release_date': '20211025', - } + }, }, { 'info_dict': { 'id': '184435', @@ -392,7 +392,7 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE): 'url': r're:^https://.+rabbit-s-luck--full-version-\.mp3$', 'release_timestamp': 1635190841, 'release_date': '20211025', - } + }, }, { 'info_dict': { 'id': '185228', @@ -401,7 +401,7 @@ class GameJoltGameSoundtrackIE(GameJoltBaseIE): 'url': r're:^https://.+last-straw\.mp3$', 'release_timestamp': 1635881104, 'release_date': '20211102', - } + }, }], 'playlist_count': 3, }] @@ -427,7 +427,7 @@ class GameJoltCommunityIE(GameJoltPostListBaseIE): 'info_dict': { 'id': 'fnf/videos', 'title': 'Friday Night Funkin\' - Videos', - 'description': 'md5:6d8c06f27460f7d35c1554757ffe53c8' + 'description': 'md5:6d8c06f27460f7d35c1554757ffe53c8', }, 'params': { 'playlistend': 50, @@ -440,7 +440,7 @@ class GameJoltCommunityIE(GameJoltPostListBaseIE): 'info_dict': { 'id': 'youtubers/featured', 'title': 'Youtubers - featured', - 'description': 'md5:53e5582c93dcc467ab597bfca4db17d4' + 'description': 'md5:53e5582c93dcc467ab597bfca4db17d4', }, 'params': { 'playlistend': 50, @@ -528,7 +528,7 @@ class GameJoltSearchIE(GameJoltPostListBaseIE): def _real_extract(self, url): filter_mode, query = self._match_valid_url(url).group('filter', 'id') - display_query = compat_urllib_parse_unquote(query) + display_query = urllib.parse.unquote(query) return self.playlist_result( self._search_entries(query, filter_mode, display_query) if filter_mode else self._entries( f'web/posts/fetch/search/{query}', display_query, initial_items=self._call_api( diff --git a/yt_dlp/extractor/gamespot.py b/yt_dlp/extractor/gamespot.py index 8dec252..cd3f965 100644 --- a/yt_dlp/extractor/gamespot.py +++ b/yt_dlp/extractor/gamespot.py @@ -1,5 +1,6 @@ +import urllib.parse + from .once import OnceIE -from ..compat import compat_urllib_parse_unquote class GameSpotIE(OnceIE): @@ -40,7 +41,7 @@ class GameSpotIE(OnceIE): data_video = self._parse_json(self._html_search_regex( r'data-video=(["\'])({.*?})\1', webpage, 'video data', group=2), page_id) - title = compat_urllib_parse_unquote(data_video['title']) + title = urllib.parse.unquote(data_video['title']) streams = data_video['videoStreams'] formats = [] diff --git a/yt_dlp/extractor/gamestar.py b/yt_dlp/extractor/gamestar.py index e9966f5..8e3b8a5 100644 --- a/yt_dlp/extractor/gamestar.py +++ b/yt_dlp/extractor/gamestar.py @@ -19,7 +19,7 @@ class GameStarIE(InfoExtractor): 'timestamp': 1406542380, 'upload_date': '20140728', 'duration': 17, - } + }, }, { 'url': 'http://www.gamepro.de/videos/top-10-indie-spiele-fuer-nintendo-switch-video-tolle-nindies-games-zum-download,95316.html', 'only_matching': True, @@ -42,7 +42,7 @@ class GameStarIE(InfoExtractor): webpage, 'JSON-LD', group='json_ld'), video_id) info_dict = self._json_ld(json_ld, video_id) info_dict['title'] = remove_end( - info_dict['title'], ' - Game%s' % site.title()) + info_dict['title'], f' - Game{site.title()}') view_count = int_or_none(json_ld.get('interactionCount')) comment_count = int_or_none(self._html_search_regex( @@ -54,7 +54,7 @@ class GameStarIE(InfoExtractor): 'url': 'http://gamestar.de/_misc/videos/portal/getVideoUrl.cfm?premium=0&videoId=' + video_id, 'ext': 'mp4', 'view_count': view_count, - 'comment_count': comment_count + 'comment_count': comment_count, }) return info_dict diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py index 6403be8..beb5a8a 100644 --- a/yt_dlp/extractor/gaskrank.py +++ b/yt_dlp/extractor/gaskrank.py @@ -22,7 +22,7 @@ class GaskrankIE(InfoExtractor): 'display_id': 'strike-einparken-durch-anfaenger-crash-mit-groesserem-flurschaden', 'uploader_id': 'Bikefun', 'upload_date': '20170110', - } + }, }, { 'url': 'http://www.gaskrank.tv/tv/racing/isle-of-man-tt-2011-michael-du-15920.htm', 'md5': 'c33ee32c711bc6c8224bfcbe62b23095', @@ -36,7 +36,7 @@ class GaskrankIE(InfoExtractor): 'uploader_id': 'IOM', 'upload_date': '20170523', 'uploader_url': 'www.iomtt.com', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/gazeta.py b/yt_dlp/extractor/gazeta.py index 8925b69..b1b6ee7 100644 --- a/yt_dlp/extractor/gazeta.py +++ b/yt_dlp/extractor/gazeta.py @@ -33,7 +33,7 @@ class GazetaIE(InfoExtractor): mobj = self._match_valid_url(url) display_id = mobj.group('id') - embed_url = '%s?p=embed' % mobj.group('url') + embed_url = '{}?p=embed'.format(mobj.group('url')) embed_page = self._download_webpage( embed_url, display_id, 'Downloading embed page') @@ -41,4 +41,4 @@ class GazetaIE(InfoExtractor): r'<div[^>]*?class="eagleplayer"[^>]*?data-id="([^"]+)"', embed_page, 'video id') return self.url_result( - 'eagleplatform:gazeta.media.eagleplatform.com:%s' % video_id, 'EaglePlatform') + f'eagleplatform:gazeta.media.eagleplatform.com:{video_id}', 'EaglePlatform') diff --git a/yt_dlp/extractor/gbnews.py b/yt_dlp/extractor/gbnews.py index bb1554e..d652566 100644 --- a/yt_dlp/extractor/gbnews.py +++ b/yt_dlp/extractor/gbnews.py @@ -1,5 +1,3 @@ -import functools - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -35,7 +33,7 @@ class GBNewsIE(InfoExtractor): 'display_id': 'prince-harry-in-love-with-kate-meghan-markle-jealous-royal', 'description': 'Ingrid Seward has published 17 books documenting the highs and lows of the Royal Family', 'title': 'Royal author claims Prince Harry was \'in love\' with Kate - Meghan was \'jealous\'', - } + }, }, { 'url': 'https://www.gbnews.uk/watchlive', 'info_dict': { @@ -48,13 +46,20 @@ class GBNewsIE(InfoExtractor): }, 'params': {'skip_download': 'm3u8'}, }] + _SS_ENDPOINTS = None - @functools.lru_cache def _get_ss_endpoint(self, data_id, data_env): + if not self._SS_ENDPOINTS: + self._SS_ENDPOINTS = {} + if not data_id: data_id = 'GB003' if not data_env: data_env = 'production' + key = data_id, data_env + result = self._SS_ENDPOINTS.get(key) + if result: + return result json_data = self._download_json( self._SSMP_URL, None, 'Downloading Simplestream JSON metadata', query={ @@ -65,6 +70,7 @@ class GBNewsIE(InfoExtractor): if not meta_url: raise ExtractorError('No API host found') + self._SS_ENDPOINTS[key] = meta_url return meta_url def _real_extract(self, url): diff --git a/yt_dlp/extractor/gdcvault.py b/yt_dlp/extractor/gdcvault.py index b4d81b2..5d45240 100644 --- a/yt_dlp/extractor/gdcvault.py +++ b/yt_dlp/extractor/gdcvault.py @@ -18,8 +18,8 @@ class GDCVaultIE(InfoExtractor): 'id': '201311826596_AWNY', 'display_id': 'Doki-Doki-Universe-Sweet-Simple', 'ext': 'mp4', - 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)' - } + 'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)', + }, }, { 'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of', @@ -27,11 +27,11 @@ class GDCVaultIE(InfoExtractor): 'id': '201203272_1330951438328RSXR', 'display_id': 'Embracing-the-Dark-Art-of', 'ext': 'flv', - 'title': 'Embracing the Dark Art of Mathematical Modeling in AI' + 'title': 'Embracing the Dark Art of Mathematical Modeling in AI', }, 'params': { 'skip_download': True, # Requires rtmpdump - } + }, }, { 'url': 'http://www.gdcvault.com/play/1015301/Thexder-Meets-Windows-95-or', @@ -202,7 +202,7 @@ class GDCVaultIE(InfoExtractor): 'display_id': display_id, }) return info - embed_url = '%s/xml/%s' % (xml_root, xml_name) + embed_url = f'{xml_root}/xml/{xml_name}' ie_key = 'DigitallySpeaking' return { diff --git a/yt_dlp/extractor/gedidigital.py b/yt_dlp/extractor/gedidigital.py index 1878d63..2ffa660 100644 --- a/yt_dlp/extractor/gedidigital.py +++ b/yt_dlp/extractor/gedidigital.py @@ -109,7 +109,7 @@ class GediDigitalIE(InfoExtractor): # add protocol if missing for i, e in enumerate(urls): if e.startswith('//'): - urls[i] = 'https:%s' % e + urls[i] = f'https:{e}' # clean iframes urls for i, e in enumerate(urls): urls[i] = urljoin(base_url(e), url_basename(e)) @@ -166,7 +166,7 @@ class GediDigitalIE(InfoExtractor): 'abr': abr, 'tbr': abr, 'acodec': ext, - 'vcodec': 'none' + 'vcodec': 'none', }) else: mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2818c71..3b8e1e9 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -62,7 +62,7 @@ class GenericIE(InfoExtractor): 'upload_date': '20100513', 'direct': True, 'timestamp': 1273772943.0, - } + }, }, # Direct link to media delivered compressed (until Accept-Encoding is *) { @@ -75,7 +75,7 @@ class GenericIE(InfoExtractor): 'upload_date': '20140522', }, 'expected_warnings': [ - 'URL could be a direct video link, returning it as such.' + 'URL could be a direct video link, returning it as such.', ], 'skip': 'URL invalid', }, @@ -109,8 +109,8 @@ class GenericIE(InfoExtractor): 'timestamp': 1416498816.0, }, 'expected_warnings': [ - 'URL could be a direct video link, returning it as such.' - ] + 'URL could be a direct video link, returning it as such.', + ], }, # RSS feed { @@ -118,7 +118,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'https://phihag.de/2014/youtube-dl/rss2.xml', 'title': 'Zero Punctuation', - 'description': 're:.*groundbreaking video review series.*' + 'description': 're:.*groundbreaking video review series.*', }, 'playlist_mincount': 11, }, @@ -334,7 +334,7 @@ class GenericIE(InfoExtractor): }, 'params': { 'skip_download': False, - } + }, }, { # redirect in Refresh HTTP header @@ -360,7 +360,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'uploader': 'www.hodiho.fr', 'title': 'R\u00e9gis plante sa Jeep', - } + }, }, # bandcamp page with custom domain { @@ -438,19 +438,19 @@ class GenericIE(InfoExtractor): 'id': '370908', 'title': 'Госзаказ. День 3', 'ext': 'mp4', - } + }, }, { 'info_dict': { 'id': '370905', 'title': 'Госзаказ. День 2', 'ext': 'mp4', - } + }, }, { 'info_dict': { 'id': '370902', 'title': 'Госзаказ. День 1', 'ext': 'mp4', - } + }, }], 'params': { # m3u8 download @@ -491,7 +491,7 @@ class GenericIE(InfoExtractor): 'title': 'Hidden miracles of the natural world', 'uploader': 'Louie Schwartzberg', 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', - } + }, }, # nowvideo embed hidden behind percent encoding { @@ -516,7 +516,7 @@ class GenericIE(InfoExtractor): 'upload_date': '20140320', }, 'params': { - 'skip_download': 'Requires rtmpdump' + 'skip_download': 'Requires rtmpdump', }, 'skip': 'video gone', }, @@ -537,8 +537,8 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'expected_warnings': [ - 'Forbidden' - ] + 'Forbidden', + ], }, # Condé Nast embed { @@ -548,7 +548,7 @@ class GenericIE(InfoExtractor): 'id': '53501be369702d3275860000', 'ext': 'mp4', 'title': 'Honda’s New Asimo Robot Is More Human Than Ever', - } + }, }, # Dailymotion embed { @@ -595,7 +595,7 @@ class GenericIE(InfoExtractor): 'add_ie': ['Youtube'], 'params': { 'skip_download': True, - } + }, }, # MTVServices embed { @@ -624,7 +624,7 @@ class GenericIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, # Flowplayer { @@ -636,7 +636,7 @@ class GenericIE(InfoExtractor): 'age_limit': 18, 'uploader': 'www.handjobhub.com', 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com', - } + }, }, # MLB embed { @@ -680,7 +680,7 @@ class GenericIE(InfoExtractor): 'uploader': 'Sophos Security', 'title': 'Chet Chat 171 - Oct 29, 2014', 'upload_date': '20141029', - } + }, }, # Soundcloud multiple embeds { @@ -714,7 +714,7 @@ class GenericIE(InfoExtractor): 'ext': 'flv', 'upload_date': '20141112', 'title': 'Rosetta #CometLanding webcast HL 10', - } + }, }, # Another Livestream embed, without 'new.' in URL { @@ -749,7 +749,7 @@ class GenericIE(InfoExtractor): 'title': 'Underground Wellness Radio - Jack Tips: 5 Steps to Permanent Gut Healing', 'thumbnail': 'https://assets.libsyn.com/secure/item/3793998/?height=90&width=90', 'duration': 3989.0, - } + }, }, # Cinerama player { @@ -759,7 +759,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'uploader': 'www.abc.net.au', 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015', - } + }, }, # embedded viddler video { @@ -876,7 +876,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'aanslagen-kopenhagen', 'title': 'Aanslagen Kopenhagen', - } + }, }, # Zapiks embed { @@ -885,7 +885,7 @@ class GenericIE(InfoExtractor): 'id': '118046', 'ext': 'mp4', 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', - } + }, }, # Kaltura embed (different embed code) { @@ -924,11 +924,11 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], 'expected_warnings': [ - 'Could not send HEAD request' + 'Could not send HEAD request', ], 'params': { 'skip_download': True, - } + }, }, { # Kaltura embedded, some fileExt broken (#11480) @@ -1055,7 +1055,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': '8RUoRhRi', 'ext': 'mp4', - 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", + 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', 'timestamp': 1428207000, 'upload_date': '20150405', @@ -1131,7 +1131,7 @@ class GenericIE(InfoExtractor): 'uploader': 'clickhole', 'upload_date': '20150527', 'timestamp': 1432744860, - } + }, }, # SnagFilms embed { @@ -1140,7 +1140,7 @@ class GenericIE(InfoExtractor): 'id': '74849a00-85a9-11e1-9660-123139220831', 'ext': 'mp4', 'title': '#whilewewatch', - } + }, }, # AdobeTVVideo embed { @@ -1436,7 +1436,7 @@ class GenericIE(InfoExtractor): 'upload_date': '20211217', 'thumbnail': 'https://www.megatv.com/wp-content/uploads/2021/12/tsiodras-mitsotakis-1024x545.jpg', }, - }] + }], }, { 'url': 'https://www.ertnews.gr/video/manolis-goyalles-o-anthropos-piso-apo-ti-diadiktyaki-vasilopita/', @@ -1547,7 +1547,7 @@ class GenericIE(InfoExtractor): 'id': '0f64ce6', 'title': 'vl14062007715967', 'ext': 'mp4', - } + }, }, { 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/', @@ -1559,7 +1559,7 @@ class GenericIE(InfoExtractor): 'description': 'md5:5a51db84a62def7b7054df2ade403c6c', 'timestamp': 1474354800, 'upload_date': '20160920', - } + }, }, { 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton', @@ -1651,7 +1651,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': '83645793', 'title': 'Lock up and get excited', - 'ext': 'mp4' + 'ext': 'mp4', }, 'skip': 'TODO: fix nested playlists processing in tests', }, @@ -1727,7 +1727,7 @@ class GenericIE(InfoExtractor): 'upload_date': '20220110', 'thumbnail': 'https://opentv-static.siliconweb.com/imgHandler/1920/70bc39fa-895b-4918-a364-c39d2135fc6d.jpg', - } + }, }, { # blogger embed @@ -1897,8 +1897,8 @@ class GenericIE(InfoExtractor): 'timestamp': 1501941939.0, 'title': 'That small heart attack.', 'upload_date': '20170805', - 'uploader': 'Antw87' - } + 'uploader': 'Antw87', + }, }, { # 1080p Reddit-hosted video that will redirect and be processed by RedditIE @@ -1910,8 +1910,8 @@ class GenericIE(InfoExtractor): 'title': "The game Didn't want me to Knife that Guy I guess", 'uploader': 'paraf1ve', 'timestamp': 1636788683.0, - 'upload_date': '20211113' - } + 'upload_date': '20211113', + }, }, { # MainStreaming player @@ -1923,15 +1923,15 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'live_status': 'not_live', 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster', - 'duration': 1512 - } + 'duration': 1512, + }, }, { # Multiple gfycat iframe embeds 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=613422', 'info_dict': { 'title': '재이, 윤, 세은 황금 드레스를 입고 빛난다', - 'id': 'board' + 'id': 'board', }, 'playlist_count': 8, }, @@ -1940,18 +1940,18 @@ class GenericIE(InfoExtractor): 'url': 'https://www.gezip.net/bbs/board.php?bo_table=entertaine&wr_id=612199', 'info_dict': { 'title': '옳게 된 크롭 니트 스테이씨 아이사', - 'id': 'board' + 'id': 'board', }, - 'playlist_count': 6 + 'playlist_count': 6, }, { # Multiple gfycat embeds, with uppercase "IFR" in urls 'url': 'https://kkzz.kr/?vid=2295', 'info_dict': { 'title': '지방시 앰버서더 에스파 카리나 움짤', - 'id': '?vid=2295' + 'id': '?vid=2295', }, - 'playlist_count': 9 + 'playlist_count': 9, }, { # Panopto embeds @@ -1984,9 +1984,9 @@ class GenericIE(InfoExtractor): 'url': 'https://www.hs.fi/kotimaa/art-2000008762560.html', 'info_dict': { 'title': 'Koronavirus | Epidemiahuippu voi olla Suomessa ohi, mutta koronaviruksen poistamista yleisvaarallisten tautien joukosta harkitaan vasta syksyllä', - 'id': 'art-2000008762560' + 'id': 'art-2000008762560', }, - 'playlist_count': 3 + 'playlist_count': 3, }, { # Ruutu embed in hs.fi with a single video @@ -2015,7 +2015,7 @@ class GenericIE(InfoExtractor): 'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg', 'timestamp': 1652833414, 'age_limit': 0, - } + }, }, { 'url': 'https://www.mollymovieclub.com/p/interstellar?s=r#details', @@ -2055,7 +2055,7 @@ class GenericIE(InfoExtractor): 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720', 'duration': 5688.0, 'upload_date': '20210111', - } + }, }, { 'note': 'JSON LD with multiple @type', @@ -2071,7 +2071,7 @@ class GenericIE(InfoExtractor): 'upload_date': '20200411', 'age_limit': 0, 'duration': 111.0, - } + }, }, { 'note': 'JSON LD with unexpected data type', @@ -2086,7 +2086,7 @@ class GenericIE(InfoExtractor): 'thumbnail': r're:^https://media.autoweek.nl/m/.+\.jpg$', 'age_limit': 0, 'direct': True, - } + }, }, { 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.', @@ -2148,7 +2148,7 @@ class GenericIE(InfoExtractor): def report_following_redirect(self, new_url): """Report information extraction.""" - self._downloader.to_screen('[redirect] Following redirect to %s' % new_url) + self._downloader.to_screen(f'[redirect] Following redirect to {new_url}') def report_detected(self, name, num=1, note=None): if num > 1: @@ -2167,7 +2167,15 @@ class GenericIE(InfoExtractor): urllib.parse.urlparse(fragment_query).query or fragment_query or urllib.parse.urlparse(manifest_url).query or None) - hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None + key_query = self._configuration_arg('key_query', [None], casesense=True)[0] + if key_query is not None: + info['extra_param_to_key_url'] = ( + urllib.parse.urlparse(key_query).query or key_query + or urllib.parse.urlparse(manifest_url).query or None) + + def hex_or_none(value): + return value if re.fullmatch(r'(0x)?[\da-f]+', value, re.IGNORECASE) else None + info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), { 'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}), }) or None @@ -2244,29 +2252,29 @@ class GenericIE(InfoExtractor): return video_url # not obfuscated parsed = urllib.parse.urlparse(video_url[len('function/0/'):]) - license = cls._kvs_get_license_token(license_code) + license_token = cls._kvs_get_license_token(license_code) urlparts = parsed.path.split('/') HASH_LENGTH = 32 - hash = urlparts[3][:HASH_LENGTH] + hash_ = urlparts[3][:HASH_LENGTH] indices = list(range(HASH_LENGTH)) # Swap indices of hash according to the destination calculated from the license token accum = 0 for src in reversed(range(HASH_LENGTH)): - accum += license[src] + accum += license_token[src] dest = (src + accum) % HASH_LENGTH indices[src], indices[dest] = indices[dest], indices[src] - urlparts[3] = ''.join(hash[index] for index in indices) + urlparts[3][HASH_LENGTH:] + urlparts[3] = ''.join(hash_[index] for index in indices) + urlparts[3][HASH_LENGTH:] return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts))) @staticmethod - def _kvs_get_license_token(license): - license = license.replace('$', '') - license_values = [int(char) for char in license] + def _kvs_get_license_token(license_code): + license_code = license_code.replace('$', '') + license_values = [int(char) for char in license_code] - modlicense = license.replace('0', '1') + modlicense = license_code.replace('0', '1') center = len(modlicense) // 2 fronthalf = int(modlicense[:center + 1]) backhalf = int(modlicense[center:]) @@ -2338,18 +2346,17 @@ class GenericIE(InfoExtractor): if default_search == 'auto_warning': if re.match(r'^(?:url|URL)$', url): raise ExtractorError( - 'Invalid URL: %r . Call yt-dlp like this: yt-dlp -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url, + f'Invalid URL: {url!r} . Call yt-dlp like this: yt-dlp -v "https://www.youtube.com/watch?v=BaW_jenozKc" ', expected=True) else: self.report_warning( - 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url) + f'Falling back to youtube search for {url} . Set --default-search "auto" to suppress this warning.') return self.url_result('ytsearch:' + url) if default_search in ('error', 'fixup_error'): raise ExtractorError( - '%r is not a valid URL. ' - 'Set --default-search "ytsearch" (or run yt-dlp "ytsearch:%s" ) to search YouTube' - % (url, url), expected=True) + f'{url!r} is not a valid URL. ' + f'Set --default-search "ytsearch" (or run yt-dlp "ytsearch:{url}" ) to search YouTube', expected=True) else: if ':' not in default_search: default_search += ':' @@ -2387,7 +2394,7 @@ class GenericIE(InfoExtractor): info_dict = { 'id': video_id, 'title': self._generic_title(url), - 'timestamp': unified_timestamp(full_response.headers.get('Last-Modified')) + 'timestamp': unified_timestamp(full_response.headers.get('Last-Modified')), } # Check for direct link to a video @@ -2401,7 +2408,7 @@ class GenericIE(InfoExtractor): subtitles = {} if format_id.endswith('mpegurl') or ext == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) - elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd': + elif format_id.endswith(('mpd', 'dash+xml')) or ext == 'mpd': formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers) elif format_id == 'f4m' or ext == 'f4m': formats = self._extract_f4m_formats(url, video_id, headers=headers) @@ -2410,7 +2417,7 @@ class GenericIE(InfoExtractor): 'format_id': format_id, 'url': url, 'ext': ext, - 'vcodec': 'none' if m.group('type') == 'audio' else None + 'vcodec': 'none' if m.group('type') == 'audio' else None, }] info_dict['direct'] = True info_dict.update({ @@ -2458,7 +2465,7 @@ class GenericIE(InfoExtractor): try: doc = compat_etree_fromstring(webpage) except xml.etree.ElementTree.ParseError: - doc = compat_etree_fromstring(webpage.encode('utf-8')) + doc = compat_etree_fromstring(webpage.encode()) if doc.tag == 'rss': self.report_detected('RSS feed') return self._extract_rss(url, video_id, doc) @@ -2606,7 +2613,7 @@ class GenericIE(InfoExtractor): }) # https://docs.videojs.com/player#addRemoteTextTrack # https://html.spec.whatwg.org/multipage/media.html#htmltrackelement - for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage): + for sub_match in re.finditer(rf'(?s){re.escape(varname)}' + r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage): sub = self._parse_json( sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {} sub_src = str_or_none(sub.get('src')) @@ -2728,7 +2735,7 @@ class GenericIE(InfoExtractor): REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' found = re.search( r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' - r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX, + rf'(?:[a-z-]+="[^"]+"\s+)*?content="{REDIRECT_REGEX}', webpage) if not found: # Look also in Refresh HTTP header @@ -2772,7 +2779,7 @@ class GenericIE(InfoExtractor): video_id = os.path.splitext(video_id)[0] headers = { - 'referer': actual_url + 'referer': actual_url, } entry_info_dict = { @@ -2829,5 +2836,5 @@ class GenericIE(InfoExtractor): for num, e in enumerate(entries, start=1): # 'url' results don't have a title if e.get('title') is not None: - e['title'] = '%s (%d)' % (e['title'], num) + e['title'] = '{} ({})'.format(e['title'], num) return entries diff --git a/yt_dlp/extractor/genericembeds.py b/yt_dlp/extractor/genericembeds.py index 9b4f14d..359d38b 100644 --- a/yt_dlp/extractor/genericembeds.py +++ b/yt_dlp/extractor/genericembeds.py @@ -15,8 +15,8 @@ class HTML5MediaEmbedIE(InfoExtractor): 'title': 'HTML5 Media', 'description': 'md5:933b2d02ceffe7a7a0f3c8326d91cc2a', }, - 'playlist_count': 2 - } + 'playlist_count': 2, + }, ] def _extract_from_webpage(self, url, webpage): @@ -50,7 +50,7 @@ class QuotedHTMLIE(InfoExtractor): 'description': 'md5:6816e1e5a65304bd7898e4c7eb1b26f7', 'age_limit': 0, }, - 'playlist_count': 2 + 'playlist_count': 2, }, { # Generic iframe embed of TV24UAPlayerIE within data-html 'url': 'https://24tv.ua/harkivyani-zgaduyut-misto-do-viyni-shhemlive-video_n1887584', @@ -60,7 +60,7 @@ class QuotedHTMLIE(InfoExtractor): 'title': 'Харків\'яни згадують місто до війни: щемливе відео', 'thumbnail': r're:^https?://.*\.jpe?g', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # YouTube embeds on Squarespace (data-html): https://github.com/ytdl-org/youtube-dl/issues/21294 'url': 'https://www.harvardballetcompany.org/past-productions', @@ -70,7 +70,7 @@ class QuotedHTMLIE(InfoExtractor): 'age_limit': 0, 'description': 'Past Productions', }, - 'playlist_mincount': 26 + 'playlist_mincount': 26, }, { # Squarespace video embed, 2019-08-28, data-html 'url': 'http://ootboxford.com', diff --git a/yt_dlp/extractor/getcourseru.py b/yt_dlp/extractor/getcourseru.py index 144321a..53b8810 100644 --- a/yt_dlp/extractor/getcourseru.py +++ b/yt_dlp/extractor/getcourseru.py @@ -17,7 +17,7 @@ class GetCourseRuPlayerIE(InfoExtractor): 'title': '190bdf93f1b29735309853a7a19e24b3', 'ext': 'mp4', 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80', - 'duration': 1693 + 'duration': 1693, }, 'skip': 'JWT expired', }] @@ -38,7 +38,7 @@ class GetCourseRuPlayerIE(InfoExtractor): }), 'id': video_id, 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } @@ -47,7 +47,7 @@ class GetCourseRuIE(InfoExtractor): _DOMAINS = [ 'academymel.online', 'marafon.mani-beauty.com', - 'on.psbook.ru' + 'on.psbook.ru', ] _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})' _VALID_URL = [ @@ -68,9 +68,9 @@ class GetCourseRuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Промоуроки Академии МЕЛ', 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80', - 'duration': 1693 + 'duration': 1693, }, - }] + }], }, { 'url': 'https://academymel.getcourse.ru/3video_1', 'info_dict': { @@ -85,9 +85,9 @@ class GetCourseRuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Промоуроки Академии МЕЛ', 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80', - 'duration': 1693 + 'duration': 1693, }, - }] + }], }, { 'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0', 'info_dict': { @@ -101,10 +101,10 @@ class GetCourseRuIE(InfoExtractor): 'ext': 'mp4', 'title': '1. Разминка у стены', 'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81', - 'duration': 704 + 'duration': 704, }, }], - 'skip': 'paid lesson' + 'skip': 'paid lesson', }, { 'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894', 'info_dict': { @@ -118,10 +118,10 @@ class GetCourseRuIE(InfoExtractor): 'ext': 'mp4', 'title': 'Мотивация к тренировкам', 'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71', - 'duration': 30 + 'duration': 30, }, }], - 'skip': 'paid lesson' + 'skip': 'paid lesson', }, { 'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT', 'only_matching': True, diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index b9dc7c6..2a9d5e7 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -38,7 +38,7 @@ class GettrIE(GettrBaseIE): 'timestamp': 1632782451.058, 'duration': 58.5585, 'tags': ['hornofafrica', 'explorations'], - } + }, }, { 'url': 'https://gettr.com/post/p4iahp', 'info_dict': { @@ -53,7 +53,7 @@ class GettrIE(GettrBaseIE): 'timestamp': 1626594455.017, 'duration': 23, 'tags': 'count:12', - } + }, }, { # quote post 'url': 'https://gettr.com/post/pxn5b743a9', @@ -76,7 +76,7 @@ class GettrIE(GettrBaseIE): def _real_extract(self, url): post_id = self._match_id(url) webpage = self._download_webpage(url, post_id) - api_data = self._call_api('post/%s?incl="poststats|userinfo"' % post_id, post_id) + api_data = self._call_api(f'post/{post_id}?incl="poststats|userinfo"', post_id) post_data = api_data.get('data') user_data = try_get(api_data, lambda x: x['aux']['uinf'][post_data['uid']], dict) or {} @@ -106,7 +106,7 @@ class GettrIE(GettrBaseIE): or self._search_regex(r'^(.+?) on GETTR', self._og_search_title(webpage, default=''), 'uploader', fatal=False)) if uploader: - title = '%s - %s' % (uploader, title) + title = f'{uploader} - {title}' formats, subtitles = self._extract_m3u8_formats_and_subtitles( urljoin(self._MEDIA_BASE_URL, vid), post_id, 'mp4', @@ -157,7 +157,7 @@ class GettrStreamingIE(GettrBaseIE): 'title': 'Day 1: Opening Session of the Grand Jury Proceeding', 'timestamp': 1644080997.164, 'upload_date': '20220205', - } + }, }, { 'url': 'https://gettr.com/streaming/psfmeefcc1', 'info_dict': { @@ -172,12 +172,12 @@ class GettrStreamingIE(GettrBaseIE): 'duration': 21872.507, 'timestamp': 1643976662.858, 'upload_date': '20220204', - } + }, }] def _real_extract(self, url): video_id = self._match_id(url) - video_info = self._call_api('live/join/%s' % video_id, video_id, data={}) + video_info = self._call_api(f'live/join/{video_id}', video_id, data={}) live_info = video_info['broadcast'] live_url = url_or_none(live_info.get('url')) diff --git a/yt_dlp/extractor/giantbomb.py b/yt_dlp/extractor/giantbomb.py index 1125723..259d390 100644 --- a/yt_dlp/extractor/giantbomb.py +++ b/yt_dlp/extractor/giantbomb.py @@ -22,7 +22,7 @@ class GiantBombIE(InfoExtractor): 'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24', 'duration': 2399, 'thumbnail': r're:^https?://.*\.jpg$', - } + }, }, { 'url': 'https://www.giantbomb.com/shows/ben-stranding/2970-20212', 'only_matching': True, diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py index 7baf8de..cc18ee6 100644 --- a/yt_dlp/extractor/gigya.py +++ b/yt_dlp/extractor/gigya.py @@ -15,5 +15,5 @@ class GigyaBaseIE(InfoExtractor): error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') if error_message: raise ExtractorError( - 'Unable to login: %s' % error_message, expected=True) + f'Unable to login: {error_message}', expected=True) return auth_info diff --git a/yt_dlp/extractor/glide.py b/yt_dlp/extractor/glide.py index d114f34..b4c8681 100644 --- a/yt_dlp/extractor/glide.py +++ b/yt_dlp/extractor/glide.py @@ -12,7 +12,7 @@ class GlideIE(InfoExtractor): 'ext': 'mp4', 'title': "Damon's Glide message", 'thumbnail': r're:^https?://.*?\.cloudfront\.net/.*\.jpg$', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/globalplayer.py b/yt_dlp/extractor/globalplayer.py index e0c0d58..3d4a930 100644 --- a/yt_dlp/extractor/globalplayer.py +++ b/yt_dlp/extractor/globalplayer.py @@ -37,7 +37,7 @@ class GlobalPlayerBaseIE(InfoExtractor): 'url': 'streamUrl', 'timestamp': (('pubDate', 'startDate'), {unified_timestamp}), 'title': 'title', - }, get_all=False) + }, get_all=False), } @@ -111,7 +111,7 @@ class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE): 'live_status': 'is_live', 'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d', 'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=', - 'title': 're:^Classic FM Hall of Fame.+$' + 'title': 're:^Classic FM Hall of Fame.+$', }, }] diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py index df98f09..d72296b 100644 --- a/yt_dlp/extractor/globo.py +++ b/yt_dlp/extractor/globo.py @@ -5,9 +5,6 @@ import random import re from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..networking import HEADRequest from ..utils import ( ExtractorError, @@ -88,7 +85,7 @@ class GloboIE(InfoExtractor): video_id, 'Getting cookies') video = self._download_json( - 'http://api.globovideos.com/videos/%s/playlist' % video_id, + f'http://api.globovideos.com/videos/{video_id}/playlist', video_id)['videos'][0] if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True: self.report_drm(video_id) @@ -97,14 +94,14 @@ class GloboIE(InfoExtractor): formats = [] security = self._download_json( - 'https://playback.video.globo.com/v2/video-session', video_id, 'Downloading security hash for %s' % video_id, + 'https://playback.video.globo.com/v2/video-session', video_id, f'Downloading security hash for {video_id}', headers={'content-type': 'application/json'}, data=json.dumps({ - "player_type": "desktop", - "video_id": video_id, - "quality": "max", - "content_protection": "widevine", - "vsid": "581b986b-4c40-71f0-5a58-803e579d5fa2", - "tz": "-3.0:00" + 'player_type': 'desktop', + 'video_id': video_id, + 'quality': 'max', + 'content_protection': 'widevine', + 'vsid': '581b986b-4c40-71f0-5a58-803e579d5fa2', + 'tz': '-3.0:00', }).encode()) self._request_webpage(HEADRequest(security['sources'][0]['url_template']), video_id, 'Getting locksession cookie') @@ -114,7 +111,7 @@ class GloboIE(InfoExtractor): message = security.get('message') if message: raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, message), expected=True) + f'{self.IE_NAME} returned error: {message}', expected=True) hash_code = security_hash[:2] padding = '%010d' % random.randint(1, 10000000000) @@ -128,13 +125,13 @@ class GloboIE(InfoExtractor): padding += '1' hash_prefix = '05' + security_hash[:22] - padded_sign_time = compat_str(int(received_time) + 86400) + padding + padded_sign_time = str(int(received_time) + 86400) + padding md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode() signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') signed_hash = hash_prefix + padded_sign_time + signed_md5 source = security['sources'][0]['url_parts'] resource_url = source['scheme'] + '://' + source['domain'] + source['path'] - signed_url = '%s?h=%s&k=html5&a=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A') + signed_url = '{}?h={}&k=html5&a={}'.format(resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A') fmts, subtitles = self._extract_m3u8_formats_and_subtitles( signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) @@ -230,7 +227,7 @@ class GloboArticleIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url) + return False if GloboIE.suitable(url) else super().suitable(url) def _real_extract(self, url): display_id = self._match_id(url) @@ -239,7 +236,7 @@ class GloboArticleIE(InfoExtractor): for video_regex in self._VIDEOID_REGEXES: video_ids.extend(re.findall(video_regex, webpage)) entries = [ - self.url_result('globo:%s' % video_id, GloboIE.ie_key()) + self.url_result(f'globo:{video_id}', GloboIE.ie_key()) for video_id in orderedSet(video_ids)] title = self._og_search_title(webpage).strip() description = self._html_search_meta('description', webpage) diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index 515f3c5..35ffad5 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -49,15 +49,15 @@ class GlomexBaseIE(InfoExtractor): video_id_type = self._get_videoid_type(video_id) return self._download_json( self._API_URL, - video_id, 'Downloading %s JSON' % video_id_type, - 'Unable to download %s JSON' % video_id_type, + video_id, f'Downloading {video_id_type} JSON', + f'Unable to download {video_id_type} JSON', query=query) def _download_and_extract_api_data(self, video_id, integration, current_url): api_data = self._download_api_data(video_id, integration, current_url) videos = api_data['videos'] if not videos: - raise ExtractorError('no videos found for %s' % video_id) + raise ExtractorError(f'no videos found for {video_id}') videos = [self._extract_api_data(video, video_id) for video in videos] return videos[0] if len(videos) == 1 else self.playlist_result(videos, video_id) diff --git a/yt_dlp/extractor/gmanetwork.py b/yt_dlp/extractor/gmanetwork.py index 62fff4e..ecef1e1 100644 --- a/yt_dlp/extractor/gmanetwork.py +++ b/yt_dlp/extractor/gmanetwork.py @@ -33,7 +33,7 @@ class GMANetworkVideoIE(InfoExtractor): 'channel': 'YoüLOL', 'availability': 'public', 'release_date': '20220919', - } + }, }, { 'url': 'https://www.gmanetwork.com/fullepisodes/home/more_than_words/87059/more-than-words-full-episode-80/video?section=home', 'info_dict': { @@ -60,7 +60,7 @@ class GMANetworkVideoIE(InfoExtractor): 'tags': 'count:29', 'view_count': int, 'uploader_url': 'http://www.youtube.com/user/GMANETWORK', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index fba98d7..83c1979 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -1,11 +1,11 @@ import re from .adobepass import AdobePassIE -from ..compat import compat_str from ..utils import ( ExtractorError, determine_ext, int_or_none, + join_nonempty, parse_age_limit, remove_end, remove_start, @@ -50,14 +50,14 @@ class GoIE(AdobePassIE): _VALID_URL = r'''(?x) https?:// (?P<sub_domain> - (?:%s\.)?go|fxnow\.fxnetworks| + (?:{}\.)?go|fxnow\.fxnetworks| (?:www\.)?(?:abc|freeform|disneynow) )\.com/ (?: (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)| (?:[^/]+/)*(?P<display_id>[^/?\#]+) ) - ''' % r'\.|'.join(list(_SITE_INFO.keys())) + '''.format(r'\.|'.join(list(_SITE_INFO.keys()))) _TESTS = [{ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643', 'info_dict': { @@ -94,7 +94,7 @@ class GoIE(AdobePassIE): 'series': 'Shadowhunters', 'episode_number': 1, 'timestamp': 1483387200, - 'ext': 'mp4' + 'ext': 'mp4', }, 'params': { 'geo_bypass_ip_block': '3.244.239.0/24', @@ -168,7 +168,7 @@ class GoIE(AdobePassIE): def _extract_videos(self, brand, video_id='-1', show_id='-1'): display_id = video_id if video_id != '-1' else show_id return self._download_json( - 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id), + f'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/{brand}/001/-1/{show_id}/-1/{video_id}/-1/-1.json', display_id)['video'] def _real_extract(self, url): @@ -191,7 +191,7 @@ class GoIE(AdobePassIE): video_id = try_get( layout, (lambda x: x['videoid'], lambda x: x['video']['id']), - compat_str) + str) if not video_id: video_id = self._search_regex( ( @@ -201,7 +201,7 @@ class GoIE(AdobePassIE): # page.analytics.videoIdCode r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)', # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet - r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)' + r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)', ), webpage, 'video id', default=video_id) if not site_info: brand = self._search_regex( @@ -266,7 +266,7 @@ class GoIE(AdobePassIE): self.raise_geo_restricted( error['message'], countries=['US']) error_message = ', '.join([error['message'] for error in errors]) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] fmts, subs = self._extract_m3u8_formats_and_subtitles( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False) @@ -280,7 +280,7 @@ class GoIE(AdobePassIE): } if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url): f.update({ - 'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE', + 'format_id': (f'{format_id}-' if format_id else '') + 'SOURCE', 'quality': 1, }) else: @@ -288,7 +288,7 @@ class GoIE(AdobePassIE): if mobj: height = int(mobj.group(2)) f.update({ - 'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height, + 'format_id': join_nonempty(format_id, f'{height}P'), 'width': int(mobj.group(1)), 'height': height, }) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index 276a6c7..35cc30c 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -24,7 +24,7 @@ class GodResourceIE(InfoExtractor): 'channel': 'Stedfast Baptist Church', 'upload_date': '20240320', 'title': 'GodResource video #A01mTKjyf6w', - } + }, }, { # mp4 link 'url': 'https://new.godresource.com/video/01DXmBbQv_X', @@ -39,7 +39,7 @@ class GodResourceIE(InfoExtractor): 'channel': 'Documentaries', 'title': 'The Sodomite Deception', 'upload_date': '20230629', - } + }, }] def _real_extract(self, url): @@ -56,7 +56,7 @@ class GodResourceIE(InfoExtractor): elif ext == 'mp4': formats, subtitles = [{ 'url': video_url, - 'ext': ext + 'ext': ext, }], {} else: raise ExtractorError(f'Unexpected video format {ext}') @@ -74,6 +74,6 @@ class GodResourceIE(InfoExtractor): 'channel': ('channelName', {str}), 'channel_id': ('channelId', {str_or_none}), 'timestamp': ('streamDateCreated', {unified_timestamp}), - 'modified_timestamp': ('streamDataModified', {unified_timestamp}) - }) + 'modified_timestamp': ('streamDataModified', {unified_timestamp}), + }), } diff --git a/yt_dlp/extractor/godtube.py b/yt_dlp/extractor/godtube.py index 35fb7a9..f4496ac 100644 --- a/yt_dlp/extractor/godtube.py +++ b/yt_dlp/extractor/godtube.py @@ -30,7 +30,7 @@ class GodTubeIE(InfoExtractor): video_id = mobj.group('id') config = self._download_xml( - 'http://www.godtube.com/resource/mediaplayer/%s.xml' % video_id.lower(), + f'http://www.godtube.com/resource/mediaplayer/{video_id.lower()}.xml', video_id, 'Downloading player config XML') video_url = config.find('file').text @@ -40,7 +40,7 @@ class GodTubeIE(InfoExtractor): thumbnail = config.find('image').text media = self._download_xml( - 'http://www.godtube.com/media/xml/?v=%s' % video_id, video_id, 'Downloading media XML') + f'http://www.godtube.com/media/xml/?v={video_id}', video_id, 'Downloading media XML') title = media.find('title').text diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index fac0884..a9777a5 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -20,8 +20,8 @@ class GofileIE(InfoExtractor): 'title': 'nuuh', 'release_timestamp': 1638338704, 'release_date': '20211201', - } - }] + }, + }], }, { 'url': 'https://gofile.io/d/is8lKr', 'info_dict': { @@ -63,7 +63,7 @@ class GofileIE(InfoExtractor): query_params = {'wt': '4fd6sg89d7s6'} # From https://gofile.io/dist/js/alljs.js password = self.get_param('videopassword') if password: - query_params['password'] = hashlib.sha256(password.encode('utf-8')).hexdigest() + query_params['password'] = hashlib.sha256(password.encode()).hexdigest() files = self._download_json( f'https://api.gofile.io/contents/{file_id}', file_id, 'Getting filelist', query=query_params, headers={'Authorization': f'Bearer {self._TOKEN}'}) @@ -89,7 +89,7 @@ class GofileIE(InfoExtractor): 'title': file['name'].rsplit('.', 1)[0], 'url': file_url, 'filesize': file.get('size'), - 'release_timestamp': file.get('createTime') + 'release_timestamp': file.get('createTime'), } if not found_files: diff --git a/yt_dlp/extractor/golem.py b/yt_dlp/extractor/golem.py index c33d950..90d2fe6 100644 --- a/yt_dlp/extractor/golem.py +++ b/yt_dlp/extractor/golem.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( determine_ext, ) @@ -20,7 +18,7 @@ class GolemIE(InfoExtractor): 'title': 'iPhone 6 und 6 Plus - Test', 'duration': 300.44, 'filesize': 65309548, - } + }, } _PREFIX = 'http://video.golem.de' @@ -29,7 +27,7 @@ class GolemIE(InfoExtractor): video_id = self._match_id(url) config = self._download_xml( - 'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id) + f'https://video.golem.de/xml/{video_id}.xml', video_id) info = { 'id': video_id, @@ -44,8 +42,8 @@ class GolemIE(InfoExtractor): continue formats.append({ - 'format_id': compat_str(e.tag), - 'url': compat_urlparse.urljoin(self._PREFIX, url), + 'format_id': str(e.tag), + 'url': urllib.parse.urljoin(self._PREFIX, url), 'height': self._int(e.get('height'), 'height'), 'width': self._int(e.get('width'), 'width'), 'filesize': self._int(e.findtext('filesize'), 'filesize'), @@ -59,7 +57,7 @@ class GolemIE(InfoExtractor): if not url: continue thumbnails.append({ - 'url': compat_urlparse.urljoin(self._PREFIX, url), + 'url': urllib.parse.urljoin(self._PREFIX, url), 'width': self._int(e.get('width'), 'thumbnail width'), 'height': self._int(e.get('height'), 'thumbnail height'), }) diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py index c19192c..dfba2d3 100644 --- a/yt_dlp/extractor/googledrive.py +++ b/yt_dlp/extractor/googledrive.py @@ -1,8 +1,8 @@ import re +import urllib.parse from .common import InfoExtractor from .youtube import YoutubeIE -from ..compat import compat_parse_qs from ..utils import ( ExtractorError, bug_reports_message, @@ -39,7 +39,7 @@ class GoogleDriveIE(InfoExtractor): 'title': 'Big Buck Bunny.mp4', 'duration': 45, 'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ', - } + }, }, { # has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922) 'url': 'https://drive.google.com/uc?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x', @@ -88,7 +88,7 @@ class GoogleDriveIE(InfoExtractor): r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})', webpage) if mobj: - yield 'https://drive.google.com/file/d/%s' % mobj.group('id') + yield 'https://drive.google.com/file/d/{}'.format(mobj.group('id')) def _download_subtitles_xml(self, video_id, subtitles_id, hl): if self._captions_xml: @@ -166,7 +166,7 @@ class GoogleDriveIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - video_info = compat_parse_qs(self._download_webpage( + video_info = urllib.parse.parse_qs(self._download_webpage( 'https://drive.google.com/get_video_info', video_id, 'Downloading video webpage', query={'docid': video_id})) @@ -218,8 +218,8 @@ class GoogleDriveIE(InfoExtractor): def request_source_file(source_url, kind, data=None): return self._request_webpage( - source_url, video_id, note='Requesting %s file' % kind, - errnote='Unable to request %s file' % kind, fatal=False, data=data) + source_url, video_id, note=f'Requesting {kind} file', + errnote=f'Unable to request {kind} file', fatal=False, data=data) urlh = request_source_file(source_url, 'source') if urlh: def add_source_format(urlh): @@ -270,7 +270,7 @@ class GoogleDriveIE(InfoExtractor): if ttsurl: # the video Id for subtitles will be the last value in the ttsurl # query string - subtitles_id = ttsurl.encode('utf-8').decode( + subtitles_id = ttsurl.encode().decode( 'unicode_escape').split('=')[-1] self.cookiejar.clear(domain='.google.com', path='/', name='NID') @@ -294,7 +294,7 @@ class GoogleDriveFolderIE(InfoExtractor): 'url': 'https://drive.google.com/drive/folders/1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI', 'info_dict': { 'id': '1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI', - 'title': 'Forrest' + 'title': 'Forrest', }, 'playlist_count': 3, }] @@ -312,13 +312,13 @@ GET %s def _call_api(self, folder_id, key, data, **kwargs): response = self._download_webpage( 'https://clients6.google.com/batch/drive/v2beta', - folder_id, data=data.encode('utf-8'), + folder_id, data=data.encode(), headers={ 'Content-Type': 'text/plain;charset=UTF-8;', 'Origin': 'https://drive.google.com', }, query={ '$ct': f'multipart/mixed; boundary="{self._BOUNDARY}"', - 'key': key + 'key': key, }, **kwargs) return self._search_json('', response, 'api response', folder_id, **kwargs) or {} diff --git a/yt_dlp/extractor/googlepodcasts.py b/yt_dlp/extractor/googlepodcasts.py index 8b2351b..8d1cc4f 100644 --- a/yt_dlp/extractor/googlepodcasts.py +++ b/yt_dlp/extractor/googlepodcasts.py @@ -48,7 +48,7 @@ class GooglePodcastsIE(GooglePodcastsBaseIE): 'timestamp': 1609606800, 'duration': 2901, 'series': "Wait Wait... Don't Tell Me!", - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py index 7a98e0f..dfe5afe 100644 --- a/yt_dlp/extractor/goplay.py +++ b/yt_dlp/extractor/goplay.py @@ -31,7 +31,7 @@ class GoPlayIE(InfoExtractor): 'episode': 'Episode 2', 'episode_number': 2, }, - 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', 'info_dict': { @@ -39,7 +39,7 @@ class GoPlayIE(InfoExtractor): 'ext': 'mp4', 'title': 'A Family for the Holidays', }, - 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'info_dict': { @@ -50,12 +50,12 @@ class GoPlayIE(InfoExtractor): 'series': 'De Mol', 'season_number': 11, 'episode_number': 1, - 'season': 'Season 11' + 'season': 'Season 11', }, 'params': { - 'skip_download': True + 'skip_download': True, }, - 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users', }] _id_token = None @@ -79,7 +79,7 @@ class GoPlayIE(InfoExtractor): if movie: video_id = movie['videoUuid'] info_dict = { - 'title': movie.get('title') + 'title': movie.get('title'), } else: episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False) @@ -94,7 +94,7 @@ class GoPlayIE(InfoExtractor): api = self._download_json( f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', video_id, headers={ - 'Authorization': 'Bearer %s' % self._id_token, + 'Authorization': f'Bearer {self._id_token}', **self.geo_verification_headers(), }) @@ -154,31 +154,32 @@ class AwsIdp: self.ie = ie self.pool_id = pool_id - if "_" not in self.pool_id: - raise ValueError("Invalid pool_id format. Should be <region>_<poolid>.") + if '_' not in self.pool_id: + raise ValueError('Invalid pool_id format. Should be <region>_<poolid>.') self.client_id = client_id - self.region = self.pool_id.split("_")[0] - self.url = "https://cognito-idp.%s.amazonaws.com/" % (self.region,) + self.region = self.pool_id.split('_')[0] + self.url = f'https://cognito-idp.{self.region}.amazonaws.com/' # Initialize the values # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L22 - self.n_hex = 'FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1' + \ - '29024E088A67CC74020BBEA63B139B22514A08798E3404DD' + \ - 'EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245' + \ - 'E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED' + \ - 'EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D' + \ - 'C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F' + \ - '83655D23DCA3AD961C62F356208552BB9ED529077096966D' + \ - '670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B' + \ - 'E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9' + \ - 'DE2BCBF6955817183995497CEA956AE515D2261898FA0510' + \ - '15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64' + \ - 'ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7' + \ - 'ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B' + \ - 'F12FFA06D98A0864D87602733EC86A64521F2B18177B200C' + \ - 'BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31' + \ - '43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF' + self.n_hex = ( + 'FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD1' + '29024E088A67CC74020BBEA63B139B22514A08798E3404DD' + 'EF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245' + 'E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7ED' + 'EE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3D' + 'C2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F' + '83655D23DCA3AD961C62F356208552BB9ED529077096966D' + '670C354E4ABC9804F1746C08CA18217C32905E462E36CE3B' + 'E39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9' + 'DE2BCBF6955817183995497CEA956AE515D2261898FA0510' + '15728E5A8AAAC42DAD33170D04507A33A85521ABDF1CBA64' + 'ECFB850458DBEF0A8AEA71575D060C7DB3970F85A6E1E4C7' + 'ABF5AE8CDB0933D71E8C94E04A25619DCEE3D2261AD2EE6B' + 'F12FFA06D98A0864D87602733EC86A64521F2B18177B200C' + 'BBE117577A615D6C770988C0BAD946E208E24FA074E5AB31' + '43DB5BFCE0FD108E4B82D120A93AD2CAFFFFFFFFFFFFFFFF') # https://github.com/aws/amazon-cognito-identity-js/blob/master/src/AuthenticationHelper.js#L49 self.g_hex = '2' @@ -194,26 +195,26 @@ class AwsIdp: """ Authenticate with a username and password. """ # Step 1: First initiate an authentication request auth_data_dict = self.__get_authentication_request(username) - auth_data = json.dumps(auth_data_dict).encode("utf-8") + auth_data = json.dumps(auth_data_dict).encode() auth_headers = { - "X-Amz-Target": "AWSCognitoIdentityProviderService.InitiateAuth", - "Accept-Encoding": "identity", - "Content-Type": "application/x-amz-json-1.1" + 'X-Amz-Target': 'AWSCognitoIdentityProviderService.InitiateAuth', + 'Accept-Encoding': 'identity', + 'Content-Type': 'application/x-amz-json-1.1', } auth_response_json = self.ie._download_json( self.url, None, data=auth_data, headers=auth_headers, note='Authenticating username', errnote='Invalid username') - challenge_parameters = auth_response_json.get("ChallengeParameters") + challenge_parameters = auth_response_json.get('ChallengeParameters') - if auth_response_json.get("ChallengeName") != "PASSWORD_VERIFIER": - raise AuthenticationException(auth_response_json["message"]) + if auth_response_json.get('ChallengeName') != 'PASSWORD_VERIFIER': + raise AuthenticationException(auth_response_json['message']) # Step 2: Respond to the Challenge with a valid ChallengeResponse challenge_request = self.__get_challenge_response_request(challenge_parameters, password) - challenge_data = json.dumps(challenge_request).encode("utf-8") + challenge_data = json.dumps(challenge_request).encode() challenge_headers = { - "X-Amz-Target": "AWSCognitoIdentityProviderService.RespondToAuthChallenge", - "Content-Type": "application/x-amz-json-1.1" + 'X-Amz-Target': 'AWSCognitoIdentityProviderService.RespondToAuthChallenge', + 'Content-Type': 'application/x-amz-json-1.1', } auth_response_json = self.ie._download_json( self.url, None, data=challenge_data, headers=challenge_headers, @@ -223,7 +224,7 @@ class AwsIdp: raise InvalidLoginException(auth_response_json['message']) return ( auth_response_json['AuthenticationResult']['IdToken'], - auth_response_json['AuthenticationResult']['RefreshToken'] + auth_response_json['AuthenticationResult']['RefreshToken'], ) def __get_authentication_request(self, username): @@ -234,15 +235,14 @@ class AwsIdp: :return: A full Authorization request. :rtype: dict """ - auth_request = { - "AuthParameters": { - "USERNAME": username, - "SRP_A": self.__long_to_hex(self.large_a_value) + return { + 'AuthParameters': { + 'USERNAME': username, + 'SRP_A': self.__long_to_hex(self.large_a_value), }, - "AuthFlow": "USER_SRP_AUTH", - "ClientId": self.client_id + 'AuthFlow': 'USER_SRP_AUTH', + 'ClientId': self.client_id, } - return auth_request def __get_challenge_response_request(self, challenge_parameters, password): """ Create a Challenge Response Request object. @@ -253,11 +253,11 @@ class AwsIdp: :return: A valid and full request data object to use as a response for a challenge. :rtype: dict """ - user_id = challenge_parameters["USERNAME"] - user_id_for_srp = challenge_parameters["USER_ID_FOR_SRP"] - srp_b = challenge_parameters["SRP_B"] - salt = challenge_parameters["SALT"] - secret_block = challenge_parameters["SECRET_BLOCK"] + user_id = challenge_parameters['USERNAME'] + user_id_for_srp = challenge_parameters['USER_ID_FOR_SRP'] + srp_b = challenge_parameters['SRP_B'] + salt = challenge_parameters['SALT'] + secret_block = challenge_parameters['SECRET_BLOCK'] timestamp = self.__get_current_timestamp() @@ -266,7 +266,7 @@ class AwsIdp: user_id_for_srp, password, self.__hex_to_long(srp_b), - salt + salt, ) secret_block_bytes = base64.standard_b64decode(secret_block) @@ -278,17 +278,16 @@ class AwsIdp: bytearray(timestamp, 'utf-8') hmac_obj = hmac.new(hkdf, msg, digestmod=hashlib.sha256) signature_string = base64.standard_b64encode(hmac_obj.digest()).decode('utf-8') - challenge_request = { - "ChallengeResponses": { - "USERNAME": user_id, - "TIMESTAMP": timestamp, - "PASSWORD_CLAIM_SECRET_BLOCK": secret_block, - "PASSWORD_CLAIM_SIGNATURE": signature_string + return { + 'ChallengeResponses': { + 'USERNAME': user_id, + 'TIMESTAMP': timestamp, + 'PASSWORD_CLAIM_SECRET_BLOCK': secret_block, + 'PASSWORD_CLAIM_SIGNATURE': signature_string, }, - "ChallengeName": "PASSWORD_VERIFIER", - "ClientId": self.client_id + 'ChallengeName': 'PASSWORD_VERIFIER', + 'ClientId': self.client_id, } - return challenge_request def __get_hkdf_key_for_password(self, username, password, server_b_value, salt): """ Calculates the final hkdf based on computed S value, and computed U value and the key. @@ -305,18 +304,17 @@ class AwsIdp: u_value = self.__calculate_u(self.large_a_value, server_b_value) if u_value == 0: raise ValueError('U cannot be zero.') - username_password = '%s%s:%s' % (self.pool_id.split('_')[1], username, password) - username_password_hash = self.__hash_sha256(username_password.encode('utf-8')) + username_password = '{}{}:{}'.format(self.pool_id.split('_')[1], username, password) + username_password_hash = self.__hash_sha256(username_password.encode()) x_value = self.__hex_to_long(self.__hex_hash(self.__pad_hex(salt) + username_password_hash)) g_mod_pow_xn = pow(self.g, x_value, self.big_n) int_value2 = server_b_value - self.k * g_mod_pow_xn s_value = pow(int_value2, self.small_a_value + u_value * x_value, self.big_n) - hkdf = self.__compute_hkdf( + return self.__compute_hkdf( bytearray.fromhex(self.__pad_hex(s_value)), - bytearray.fromhex(self.__pad_hex(self.__long_to_hex(u_value))) + bytearray.fromhex(self.__pad_hex(self.__long_to_hex(u_value))), ) - return hkdf def __compute_hkdf(self, ikm, salt): """ Standard hkdf algorithm @@ -368,7 +366,7 @@ class AwsIdp: @staticmethod def __long_to_hex(long_num): - return '%x' % long_num + return f'{long_num:x}' @staticmethod def __hex_to_long(hex_string): @@ -399,9 +397,9 @@ class AwsIdp: else: hash_str = long_int if len(hash_str) % 2 == 1: - hash_str = '0%s' % hash_str + hash_str = f'0{hash_str}' elif hash_str[0] in '89ABCDEFabcdef': - hash_str = '00%s' % hash_str + hash_str = f'00{hash_str}' return hash_str @staticmethod @@ -423,11 +421,10 @@ class AwsIdp: days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] time_now = dt.datetime.now(dt.timezone.utc) - format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day) - time_string = time_now.strftime(format_string) - return time_string + format_string = f'{days[time_now.weekday()]} {months[time_now.month]} {time_now.day} %H:%M:%S UTC %Y' + return time_now.strftime(format_string) def __str__(self): - return "AWS IDP Client for:\nRegion: %s\nPoolId: %s\nAppId: %s" % ( - self.region, self.pool_id.split("_")[1], self.client_id + return 'AWS IDP Client for:\nRegion: {}\nPoolId: {}\nAppId: {}'.format( + self.region, self.pool_id.split('_')[1], self.client_id, ) diff --git a/yt_dlp/extractor/gopro.py b/yt_dlp/extractor/gopro.py index ec1595b..9142566 100644 --- a/yt_dlp/extractor/gopro.py +++ b/yt_dlp/extractor/gopro.py @@ -23,7 +23,7 @@ class GoProIE(InfoExtractor): 'upload_date': '20210919', 'uploader_id': 'fireydive30018', 'duration': 396062, - } + }, }, { 'url': 'https://gopro.com/v/KRm6Vgp2peg4e', 'info_dict': { @@ -36,7 +36,7 @@ class GoProIE(InfoExtractor): 'uploader_id': 'dc9bcb8b-47d2-47c6-afbc-4c48f9a3769e', 'duration': 45187, 'track': 'The Sky Machine', - } + }, }, { 'url': 'https://gopro.com/v/kVrK9wlJvBMwn', 'info_dict': { @@ -50,7 +50,7 @@ class GoProIE(InfoExtractor): 'duration': 313075, 'track': 'Battery (Live)', 'artist': 'Metallica', - } + }, }] def _real_extract(self, url): @@ -62,7 +62,7 @@ class GoProIE(InfoExtractor): video_info = metadata['collectionMedia'][0] media_data = self._download_json( - 'https://api.gopro.com/media/%s/download' % video_info['id'], video_id) + 'https://api.gopro.com/media/{}/download'.format(video_info['id']), video_id) formats = [] for fmt in try_get(media_data, lambda x: x['_embedded']['variations']) or []: diff --git a/yt_dlp/extractor/goshgay.py b/yt_dlp/extractor/goshgay.py index 9a1f32b..7bcac9b 100644 --- a/yt_dlp/extractor/goshgay.py +++ b/yt_dlp/extractor/goshgay.py @@ -1,7 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, -) from ..utils import ( parse_duration, ) @@ -19,7 +18,7 @@ class GoshgayIE(InfoExtractor): 'thumbnail': r're:^http://.*\.jpg$', 'duration': 80, 'age_limit': 18, - } + }, } def _real_extract(self, url): @@ -32,7 +31,7 @@ class GoshgayIE(InfoExtractor): r'<span class="duration">\s*-?\s*(.*?)</span>', webpage, 'duration', fatal=False)) - flashvars = compat_parse_qs(self._html_search_regex( + flashvars = urllib.parse.parse_qs(self._html_search_regex( r'<embed.+?id="flash-player-embed".+?flashvars="([^"]+)"', webpage, 'flashvars')) thumbnail = flashvars.get('url_bigthumb', [None])[0] diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py index 9c1a6cb..e47a8ea 100644 --- a/yt_dlp/extractor/gotostage.py +++ b/yt_dlp/extractor/gotostage.py @@ -1,7 +1,6 @@ import json from .common import InfoExtractor -from ..compat import compat_str from ..utils import try_get, url_or_none @@ -15,8 +14,8 @@ class GoToStageIE(InfoExtractor): 'ext': 'mp4', 'title': 'What is GoToStage?', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 93.924711 - } + 'duration': 93.924711, + }, }, { 'url': 'https://www.gotostage.com/channel/bacc3d3535b34bafacc3f4ef8d4df78a/recording/831e74cd3e0042be96defba627b6f676/watch?source=HOMEPAGE', 'only_matching': True, @@ -25,7 +24,7 @@ class GoToStageIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) metadata = self._download_json( - 'https://api.gotostage.com/contents?ids=%s' % video_id, + f'https://api.gotostage.com/contents?ids={video_id}', video_id, note='Downloading video metadata', errnote='Unable to download video metadata')[0] @@ -36,7 +35,7 @@ class GoToStageIE(InfoExtractor): 'productReferenceKey': metadata['productRefKey'], 'firstName': 'foo', 'lastName': 'bar', - 'email': 'foobar@example.com' + 'email': 'foobar@example.com', } registration_response = self._download_json( @@ -49,7 +48,7 @@ class GoToStageIE(InfoExtractor): errnote='Unable to register user') content_response = self._download_json( - 'https://api.gotostage.com/contents/%s/asset' % video_id, + f'https://api.gotostage.com/contents/{video_id}/asset', video_id, headers={'x-registrantkey': registration_response['registrationKey']}, note='Get download url', @@ -57,11 +56,11 @@ class GoToStageIE(InfoExtractor): return { 'id': video_id, - 'title': try_get(metadata, lambda x: x['title'], compat_str), - 'url': try_get(content_response, lambda x: x['cdnLocation'], compat_str), + 'title': try_get(metadata, lambda x: x['title'], str), + 'url': try_get(content_response, lambda x: x['cdnLocation'], str), 'ext': 'mp4', 'thumbnail': url_or_none(try_get(metadata, lambda x: x['thumbnail']['location'])), 'duration': try_get(metadata, lambda x: x['duration'], float), - 'categories': [try_get(metadata, lambda x: x['category'], compat_str)], - 'is_live': False + 'categories': [try_get(metadata, lambda x: x['category'], str)], + 'is_live': False, } diff --git a/yt_dlp/extractor/gputechconf.py b/yt_dlp/extractor/gputechconf.py index 2d13bf4..f31791a 100644 --- a/yt_dlp/extractor/gputechconf.py +++ b/yt_dlp/extractor/gputechconf.py @@ -11,7 +11,7 @@ class GPUTechConfIE(InfoExtractor): 'ext': 'mp4', 'title': 'Coordinating More Than 3 Million CUDA Threads for Social Network Analysis', 'duration': 1219, - } + }, } def _real_extract(self, url): @@ -27,6 +27,6 @@ class GPUTechConfIE(InfoExtractor): return { '_type': 'url_transparent', 'id': video_id, - 'url': '%sxml/%s.xml' % (root_path, xml_file_id), + 'url': f'{root_path}xml/{xml_file_id}.xml', 'ie_key': 'DigitallySpeaking', } diff --git a/yt_dlp/extractor/graspop.py b/yt_dlp/extractor/graspop.py new file mode 100644 index 0000000..09371f8 --- /dev/null +++ b/yt_dlp/extractor/graspop.py @@ -0,0 +1,32 @@ +from .common import InfoExtractor +from ..utils import update_url, url_or_none +from ..utils.traversal import traverse_obj + + +class GraspopIE(InfoExtractor): + _VALID_URL = r'https?://vod\.graspop\.be/[a-z]{2}/(?P<id>\d+)/' + _TESTS = [{ + 'url': 'https://vod.graspop.be/fr/101556/thy-art-is-murder-concert/', + 'info_dict': { + 'id': '101556', + 'ext': 'mp4', + 'title': 'Thy Art Is Murder', + 'thumbnail': r're:https://cdn-mds\.pickx\.be/festivals/v3/global/original/.+\.jpg', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + metadata = self._download_json( + f'https://tv.proximus.be/MWC/videocenter/festivals/{video_id}/stream', video_id) + + return { + 'id': video_id, + 'formats': self._extract_m3u8_formats( + # Downgrade manifest request to avoid incomplete certificate chain error + update_url(metadata['source']['assetUri'], scheme='http'), video_id, 'mp4'), + **traverse_obj(metadata, { + 'title': ('name', {str}), + 'thumbnail': ('source', 'poster', {url_or_none}), + }), + } diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index 1ae0a68..1668900 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -24,7 +24,7 @@ class GronkhIE(InfoExtractor): 'chapters': 'count:3', 'duration': 31463, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://gronkh.tv/stream/536', 'info_dict': { @@ -36,24 +36,24 @@ class GronkhIE(InfoExtractor): 'upload_date': '20211001', 'duration': 32058, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://gronkh.tv/watch/stream/546', 'only_matching': True, }] def _real_extract(self, url): - id = self._match_id(url) - data_json = self._download_json(f'https://api.gronkh.tv/v1/video/info?episode={id}', id) - m3u8_url = self._download_json(f'https://api.gronkh.tv/v1/video/playlist?episode={id}', id)['playlist_url'] - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + video_id = self._match_id(url) + data_json = self._download_json(f'https://api.gronkh.tv/v1/video/info?episode={video_id}', video_id) + m3u8_url = self._download_json(f'https://api.gronkh.tv/v1/video/playlist?episode={video_id}', video_id)['playlist_url'] + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id) if data_json.get('vtt_url'): subtitles.setdefault('en', []).append({ 'url': data_json['vtt_url'], 'ext': 'vtt', }) return { - 'id': id, + 'id': video_id, 'title': data_json.get('title'), 'view_count': data_json.get('views'), 'thumbnail': data_json.get('preview_url'), diff --git a/yt_dlp/extractor/groupon.py b/yt_dlp/extractor/groupon.py index c1cbda3..a05fab1 100644 --- a/yt_dlp/extractor/groupon.py +++ b/yt_dlp/extractor/groupon.py @@ -50,8 +50,7 @@ class GrouponIE(InfoExtractor): url_pattern, ie_key = self._PROVIDERS.get(provider.lower()) if not url_pattern: self.report_warning( - '%s: Unsupported video provider %s, skipping video' % - (playlist_id, provider)) + f'{playlist_id}: Unsupported video provider {provider}, skipping video') continue entries.append(self.url_result(url_pattern % video_id, ie_key)) diff --git a/yt_dlp/extractor/harpodeon.py b/yt_dlp/extractor/harpodeon.py index 46eaddb..aa3b2ca 100644 --- a/yt_dlp/extractor/harpodeon.py +++ b/yt_dlp/extractor/harpodeon.py @@ -15,7 +15,7 @@ class HarpodeonIE(InfoExtractor): 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77', 'creator': 'Vitagraph Company of America', 'release_year': 1915, - } + }, }, { 'url': 'https://www.harpodeon.com/preview/The_Smoking_Out_of_Bella_Butts/268068288', 'md5': '6dfea5412845f690c7331be703f884db', @@ -26,7 +26,7 @@ class HarpodeonIE(InfoExtractor): 'description': 'md5:47e16bdb41fc8a79c83ab83af11c8b77', 'creator': 'Vitagraph Company of America', 'release_year': 1915, - } + }, }, { 'url': 'https://www.harpodeon.com/preview/Behind_the_Screen/421838710', 'md5': '7979df9ca04637282cb7d172ab3a9c3b', @@ -37,7 +37,7 @@ class HarpodeonIE(InfoExtractor): 'description': 'md5:008972a3dc51fba3965ee517d2ba9155', 'creator': 'Lone Star Corporation', 'release_year': 1916, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py index 2551cff..34cff45 100644 --- a/yt_dlp/extractor/hbo.py +++ b/yt_dlp/extractor/hbo.py @@ -3,6 +3,7 @@ import re from .common import InfoExtractor from ..utils import ( int_or_none, + join_nonempty, parse_duration, urljoin, xpath_element, @@ -56,7 +57,7 @@ class HBOBaseIE(InfoExtractor): episode_title = title = xpath_text(video_data, 'title', fatal=True) series = xpath_text(video_data, 'program') if series: - title = '%s - %s' % (series, title) + title = f'{series} - {title}' formats = [] for source in xpath_element(video_data, 'videos', 'sources', True): @@ -69,7 +70,7 @@ class HBOBaseIE(InfoExtractor): height = format_info.get('height') fmt = { 'url': path, - 'format_id': 'http%s' % ('-%dp' % height if height else ''), + 'format_id': join_nonempty('http'. height and f'{height}p'), 'width': format_info.get('width'), 'height': height, } @@ -107,7 +108,7 @@ class HBOBaseIE(InfoExtractor): else: format_info = self._FORMATS_INFO.get(source.tag, {}) formats.append({ - 'format_id': 'http-%s' % source.tag, + 'format_id': f'http-{source.tag}', 'url': video_url, 'width': format_info.get('width'), 'height': format_info.get('height'), @@ -133,7 +134,7 @@ class HBOBaseIE(InfoExtractor): subtitles = { 'en': [{ 'url': caption_url, - 'ext': 'ttml' + 'ext': 'ttml', }], } diff --git a/yt_dlp/extractor/heise.py b/yt_dlp/extractor/heise.py index 27d737c..01b700b 100644 --- a/yt_dlp/extractor/heise.py +++ b/yt_dlp/extractor/heise.py @@ -105,7 +105,7 @@ class HeiseIE(InfoExtractor): 'description': 'md5:fa164d8c8707dff124a9626d39205f5d', 'timestamp': 1414825200, 'upload_date': '20141101', - } + }, }, { 'url': 'http://www.heise.de/ct/artikel/c-t-uplink-3-3-Owncloud-Tastaturen-Peilsender-Smartphone-2403911.html', 'only_matching': True, @@ -156,7 +156,7 @@ class HeiseIE(InfoExtractor): r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id', default=None, group='id') if kaltura_id: - return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id) + return _make_kaltura_result(f'kaltura:2238431:{kaltura_id}') yt_urls = tuple(YoutubeIE._extract_embed_urls(url, webpage)) if yt_urls: @@ -191,7 +191,7 @@ class HeiseIE(InfoExtractor): formats.append({ 'url': video_url, 'format_note': label, - 'format_id': '%s_%s' % (ext, label), + 'format_id': f'{ext}_{label}', 'height': height, }) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index df6868d..0cbe991 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -57,7 +57,7 @@ class HiDiveIE(InfoExtractor): 'profileId': profile_id, 'hash': self._search_regex( r'\<button [^>]+?data-hash="(\w+)"', login_webpage, 'profile id hash'), - 'returnUrl': '/dashboard' + 'returnUrl': '/dashboard', })) def _call_api(self, video_id, title, key, data={}, **kwargs): @@ -80,7 +80,7 @@ class HiDiveIE(InfoExtractor): self.raise_geo_restricted() if restriction and restriction != 'None': raise ExtractorError( - '%s said: %s' % (self.IE_NAME, restriction), expected=True) + f'{self.IE_NAME} said: {restriction}', expected=True) formats, parsed_urls = [], {None} for rendition_id, rendition in settings['renditions'].items(): @@ -115,5 +115,5 @@ class HiDiveIE(InfoExtractor): self._search_regex(r's(\d+)', key, 'season number', default=None)), 'episode_number': int_or_none( self._search_regex(r'e(\d+)', key, 'episode number', default=None)), - 'http_headers': {'Referer': url} + 'http_headers': {'Referer': url}, } diff --git a/yt_dlp/extractor/historicfilms.py b/yt_dlp/extractor/historicfilms.py index c428fee..714f651 100644 --- a/yt_dlp/extractor/historicfilms.py +++ b/yt_dlp/extractor/historicfilms.py @@ -33,7 +33,7 @@ class HistoricFilmsIE(InfoExtractor): duration = parse_duration(self._html_search_meta( 'duration', webpage, 'duration')) - video_url = 'http://www.historicfilms.com/video/%s_%s_web.mov' % (tape_id, video_id) + video_url = f'http://www.historicfilms.com/video/{tape_id}_{video_id}_web.mov' return { 'id': video_id, diff --git a/yt_dlp/extractor/hitrecord.py b/yt_dlp/extractor/hitrecord.py index 902af44..3c3d7f9 100644 --- a/yt_dlp/extractor/hitrecord.py +++ b/yt_dlp/extractor/hitrecord.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( clean_html, float_or_none, @@ -27,14 +26,14 @@ class HitRecordIE(InfoExtractor): 'like_count': int, 'comment_count': int, 'tags': list, - } + }, } def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( - 'https://hitrecord.org/api/web/records/%s' % video_id, video_id) + f'https://hitrecord.org/api/web/records/{video_id}', video_id) title = video['title'] video_url = video['source_url']['mp4_url'] @@ -46,7 +45,7 @@ class HitRecordIE(InfoExtractor): t['text'] for t in tags_list if isinstance(t, dict) and t.get('text') - and isinstance(t['text'], compat_str)] + and isinstance(t['text'], str)] return { 'id': video_id, @@ -56,9 +55,9 @@ class HitRecordIE(InfoExtractor): 'duration': float_or_none(video.get('duration'), 1000), 'timestamp': int_or_none(video.get('created_at_i')), 'uploader': try_get( - video, lambda x: x['user']['username'], compat_str), + video, lambda x: x['user']['username'], str), 'uploader_id': try_get( - video, lambda x: compat_str(x['user']['id'])), + video, lambda x: str(x['user']['id'])), 'view_count': int_or_none(video.get('total_views_count')), 'like_count': int_or_none(video.get('hearts_count')), 'comment_count': int_or_none(video.get('comments_count')), diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py index 099c2a1..3998abc 100644 --- a/yt_dlp/extractor/hketv.py +++ b/yt_dlp/extractor/hketv.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, clean_html, @@ -45,9 +44,6 @@ class HKETVIE(InfoExtractor): 'duration': 907, 'subtitles': {}, }, - 'params': { - 'geo_verification_proxy': '<HK proxy here>', - }, 'skip': 'Geo restricted to HK', }] @@ -144,7 +140,7 @@ class HKETVIE(InfoExtractor): if not isinstance(track, dict): continue track_kind = str_or_none(track.get('kind')) - if not track_kind or not isinstance(track_kind, compat_str): + if not track_kind or not isinstance(track_kind, str): continue if track_kind.lower() not in ('captions', 'subtitles'): continue diff --git a/yt_dlp/extractor/hollywoodreporter.py b/yt_dlp/extractor/hollywoodreporter.py index 1f7eb89..52db5e5 100644 --- a/yt_dlp/extractor/hollywoodreporter.py +++ b/yt_dlp/extractor/hollywoodreporter.py @@ -53,7 +53,7 @@ class HollywoodReporterPlaylistIE(InfoExtractor): 'info_dict': { 'id': '57822', 'title': 'heat-vision-breakdown', - } + }, }] def _fetch_page(self, slug, pl_id, page): diff --git a/yt_dlp/extractor/holodex.py b/yt_dlp/extractor/holodex.py index a2b73ec..00b045e 100644 --- a/yt_dlp/extractor/holodex.py +++ b/yt_dlp/extractor/holodex.py @@ -46,7 +46,7 @@ class HolodexIE(InfoExtractor): 'url': 'https://holodex.net/watch/_m2mQyaofjI?foo=bar&playlist=69', 'info_dict': { 'id': '69', - 'title': '拿著金斧頭的藍髮大姊姊' + 'title': '拿著金斧頭的藍髮大姊姊', }, 'playlist_count': 3, }, { diff --git a/yt_dlp/extractor/hotnewhiphop.py b/yt_dlp/extractor/hotnewhiphop.py index 4f506cd..8573e89 100644 --- a/yt_dlp/extractor/hotnewhiphop.py +++ b/yt_dlp/extractor/hotnewhiphop.py @@ -1,5 +1,6 @@ +import base64 + from .common import InfoExtractor -from ..compat import compat_b64decode from ..networking import HEADRequest, Request from ..utils import ExtractorError, urlencode_postdata @@ -13,8 +14,8 @@ class HotNewHipHopIE(InfoExtractor): 'info_dict': { 'id': '1435540', 'ext': 'mp3', - 'title': 'Freddie Gibbs - Lay It Down' - } + 'title': 'Freddie Gibbs - Lay It Down', + }, } def _real_extract(self, url): @@ -42,7 +43,7 @@ class HotNewHipHopIE(InfoExtractor): if 'mediaKey' not in mkd: raise ExtractorError('Did not get a media key') - redirect_url = compat_b64decode(video_url_base64).decode('utf-8') + redirect_url = base64.b64decode(video_url_base64).decode('utf-8') redirect_req = HEADRequest(redirect_url) req = self._request_webpage( redirect_req, video_id, diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index a3a3c20..e97740c 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -6,7 +6,6 @@ import time import uuid from .common import InfoExtractor -from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -32,7 +31,7 @@ class HotStarBaseIE(InfoExtractor): def _call_api_impl(self, path, video_id, query, st=None, cookies=None): st = int_or_none(st) or int(time.time()) exp = st + 6000 - auth = 'st=%d~exp=%d~acl=/*' % (st, exp) + auth = f'st={st}~exp={exp}~acl=/*' auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest() if cookies and cookies.get('userUP'): @@ -41,7 +40,7 @@ class HotStarBaseIE(InfoExtractor): token = self._download_json( f'{self._API_URL}/um/v3/users', video_id, note='Downloading token', - data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'), + data=json.dumps({'device_ids': [{'id': str(uuid.uuid4()), 'type': 'device_id'}]}).encode(), headers={ 'hotstarauth': auth, 'x-hs-platform': 'PCTV', # or 'web' @@ -66,7 +65,7 @@ class HotStarBaseIE(InfoExtractor): return self._call_api_impl( f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={ 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265', - 'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()), + 'device-id': cookies.get('device_id').value if cookies.get('device_id') else str(uuid.uuid4()), 'os-name': 'Windows', 'os-version': '10', }) @@ -122,7 +121,7 @@ class HotStarIE(HotStarBaseIE): 'season_id': '6771', 'episode': 'Janhvi Targets Suman', 'episode_number': 8, - } + }, }, { 'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843', 'info_dict': { @@ -257,7 +256,6 @@ class HotStarIE(HotStarBaseIE): for key, prefix in self._IGNORE_MAP.items() for ignore in self._configuration_arg(key)): continue - tag_dict = dict((t.split(':', 1) + [None])[:2] for t in tags.split(';')) format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: @@ -286,6 +284,7 @@ class HotStarIE(HotStarBaseIE): geo_restricted = True continue + tag_dict = dict((*t.split(':', 1), None)[:2] for t in tags.split(';')) if tag_dict.get('encryption') not in ('plain', None): for f in current_formats: f['has_drm'] = True diff --git a/yt_dlp/extractor/hrfensehen.py b/yt_dlp/extractor/hrfensehen.py index 35e9f67..17673d5 100644 --- a/yt_dlp/extractor/hrfensehen.py +++ b/yt_dlp/extractor/hrfensehen.py @@ -24,17 +24,17 @@ class HRFernsehenIE(InfoExtractor): 'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / ' 'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music', 'subtitles': {'de': [{ - 'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt' + 'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt', }]}, 'timestamp': 1598400000, 'upload_date': '20200826', 'thumbnail': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg', 'title': 'hessenschau vom 26.08.2020', - 'duration': 1654 - } + 'duration': 1654, + }, }, { 'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html', - 'only_matching': True + 'only_matching': True, }] _GEO_COUNTRIES = ['DE'] @@ -74,7 +74,7 @@ class HRFernsehenIE(InfoExtractor): subtitle = traverse_obj(loader_data, ('mediaCollection', 'subTitles', 0, 'sources', 0, 'url')) - info = { + return { 'id': video_id, 'title': title, 'description': description, @@ -86,5 +86,3 @@ class HRFernsehenIE(InfoExtractor): loader_data, ('playerConfig', 'pluginData', 'trackingAti@all', 'richMedia', 'duration'))), 'thumbnail': self._search_regex(r'thumbnailUrl\W*([^"]+)', webpage, 'thumbnail', default=None), } - - return info diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py index 41d50d0..84e3867 100644 --- a/yt_dlp/extractor/hrti.py +++ b/yt_dlp/extractor/hrti.py @@ -28,21 +28,21 @@ class HRTiBaseIE(InfoExtractor): def _initialize_pre_login(self): init_data = { - 'application_publication_id': self._APP_PUBLICATION_ID + 'application_publication_id': self._APP_PUBLICATION_ID, } uuid = self._download_json( self._API_URL, None, note='Downloading uuid', errnote='Unable to download uuid', - data=json.dumps(init_data).encode('utf-8'))['uuid'] + data=json.dumps(init_data).encode())['uuid'] app_data = { 'uuid': uuid, 'application_publication_id': self._APP_PUBLICATION_ID, - 'application_version': self._APP_VERSION + 'application_version': self._APP_VERSION, } - req = Request(self._API_URL, data=json.dumps(app_data).encode('utf-8')) + req = Request(self._API_URL, data=json.dumps(app_data).encode()) req.get_method = lambda: 'PUT' resources = self._download_json( @@ -71,17 +71,17 @@ class HRTiBaseIE(InfoExtractor): try: auth_info = self._download_json( self._login_url, None, note='Logging in', errnote='Unable to log in', - data=json.dumps(auth_data).encode('utf-8')) + data=json.dumps(auth_data).encode()) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 406: - auth_info = self._parse_json(e.cause.response.read().encode('utf-8'), None) + auth_info = self._parse_json(e.cause.response.read().encode(), None) else: raise error_message = auth_info.get('error', {}).get('message') if error_message: raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error_message), + f'{self.IE_NAME} said: {error_message}', expected=True) self._token = auth_info['secure_streaming_token'] @@ -133,7 +133,7 @@ class HRTiIE(HRTiBaseIE): display_id = mobj.group('display_id') or video_id video = self._download_json( - '%s/video_id/%s/format/json' % (self._search_url, video_id), + f'{self._search_url}/video_id/{video_id}/format/json', display_id, 'Downloading video metadata JSON')['video'][0] title_info = video['title'] @@ -188,13 +188,13 @@ class HRTiPlaylistIE(HRTiBaseIE): display_id = mobj.group('display_id') or category_id response = self._download_json( - '%s/category_id/%s/format/json' % (self._search_url, category_id), + f'{self._search_url}/category_id/{category_id}/format/json', display_id, 'Downloading video metadata JSON') video_ids = try_get( response, lambda x: x['video_listings'][0]['alternatives'][0]['list'], list) or [video['id'] for video in response.get('videos', []) if video.get('id')] - entries = [self.url_result('hrti:%s' % video_id) for video_id in video_ids] + entries = [self.url_result(f'hrti:{video_id}') for video_id in video_ids] return self.playlist_result(entries, category_id, display_id) diff --git a/yt_dlp/extractor/hse.py b/yt_dlp/extractor/hse.py index 3cb21d2..d900429 100644 --- a/yt_dlp/extractor/hse.py +++ b/yt_dlp/extractor/hse.py @@ -39,7 +39,7 @@ class HSEShowIE(HSEShowBaseInfoExtractor): 'timestamp': 1638810000, 'upload_date': '20211206', 'channel': 'HSE24', - 'uploader': 'Arina Pirayesh' + 'uploader': 'Arina Pirayesh', }, 'params': {'skip_download': 'm3u8'}, }] @@ -72,7 +72,7 @@ class HSEProductIE(HSEShowBaseInfoExtractor): 'id': '408630', 'ext': 'mp4', 'title': 'Hose im Ponte-Mix', - 'uploader': 'Judith Williams' + 'uploader': 'Judith Williams', }, 'params': {'skip_download': 'm3u8'}, }] diff --git a/yt_dlp/extractor/huajiao.py b/yt_dlp/extractor/huajiao.py index c498fa3..093ce7d 100644 --- a/yt_dlp/extractor/huajiao.py +++ b/yt_dlp/extractor/huajiao.py @@ -22,7 +22,7 @@ class HuajiaoIE(InfoExtractor): 'upload_date': '20161007', 'uploader': 'Penny_余姿昀', 'uploader_id': '75206005', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/huffpost.py b/yt_dlp/extractor/huffpost.py index 69fdc34..156ddeb 100644 --- a/yt_dlp/extractor/huffpost.py +++ b/yt_dlp/extractor/huffpost.py @@ -40,7 +40,7 @@ class HuffPostIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id + api_url = f'http://embed.live.huffingtonpost.com/api/segments/{video_id}.json' data = self._download_json(api_url, video_id)['data'] video_title = data['title'] diff --git a/yt_dlp/extractor/hungama.py b/yt_dlp/extractor/hungama.py index 7da8aad..a687b12 100644 --- a/yt_dlp/extractor/hungama.py +++ b/yt_dlp/extractor/hungama.py @@ -98,7 +98,7 @@ class HungamaIE(HungamaBaseIE): 'en': [{ 'url': video_json['sub_title'], 'ext': 'vtt', - }] + }], } if video_json.get('sub_title') else None, } @@ -136,7 +136,7 @@ class HungamaSongIE(InfoExtractor): audio_id = self._match_id(url) data = self._download_json( - 'https://www.hungama.com/audio-player-data/track/%s' % audio_id, + f'https://www.hungama.com/audio-player-data/track/{audio_id}', audio_id, query={'_country': 'IN'})[0] track = data['song_name'] artist = data.get('singer_name') @@ -153,7 +153,7 @@ class HungamaSongIE(InfoExtractor): 'acodec': media_type, }) - title = '%s - %s' % (artist, track) if artist else track + title = f'{artist} - {track}' if artist else track thumbnail = data.get('img_src') or data.get('album_image') return { diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index 5379b54..5663a78 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -1,9 +1,10 @@ +import base64 import hashlib import random import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_b64decode, compat_urlparse from ..utils import ( ExtractorError, int_or_none, @@ -32,7 +33,7 @@ class HuyaLiveIE(InfoExtractor): }, }, { 'url': 'https://www.huya.com/xiaoyugame', - 'only_matching': True + 'only_matching': True, }] _RESOLUTION = { @@ -46,8 +47,8 @@ class HuyaLiveIE(InfoExtractor): }, '流畅': { 'width': 800, - 'height': 480 - } + 'height': 480, + }, } def _real_extract(self, url): @@ -70,7 +71,7 @@ class HuyaLiveIE(InfoExtractor): continue stream_name = stream_info.get('sStreamName') re_secret = not screen_type and live_source_type in (0, 8, 13) - params = dict(compat_urlparse.parse_qsl(unescapeHTML(stream_info['sFlvAntiCode']))) + params = dict(urllib.parse.parse_qsl(unescapeHTML(stream_info['sFlvAntiCode']))) fm, ss = '', '' if re_secret: fm, ss = self.encrypt(params, stream_info, stream_name) @@ -127,6 +128,6 @@ class HuyaLiveIE(InfoExtractor): 'uuid': int_or_none(ct % 1e7 * 1e6 % 0xffffffff), 't': '100', }) - fm = compat_b64decode(params['fm']).decode().split('_', 1)[0] + fm = base64.b64decode(params['fm']).decode().split('_', 1)[0] ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']])) return fm, ss diff --git a/yt_dlp/extractor/hypem.py b/yt_dlp/extractor/hypem.py index 54db7b3..204a72e 100644 --- a/yt_dlp/extractor/hypem.py +++ b/yt_dlp/extractor/hypem.py @@ -14,7 +14,7 @@ class HypemIE(InfoExtractor): 'uploader': 'BODYWORK', 'timestamp': 1371810457, 'upload_date': '20130621', - } + }, } def _real_extract(self, url): @@ -30,9 +30,9 @@ class HypemIE(InfoExtractor): title = track['song'] final_url = self._download_json( - 'http://hypem.com/serve/source/%s/%s' % (track_id, track['key']), + 'http://hypem.com/serve/source/{}/{}'.format(track_id, track['key']), track_id, 'Downloading metadata', headers={ - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', })['url'] return { diff --git a/yt_dlp/extractor/hypergryph.py b/yt_dlp/extractor/hypergryph.py index 96e452a..1fb2e9a 100644 --- a/yt_dlp/extractor/hypergryph.py +++ b/yt_dlp/extractor/hypergryph.py @@ -12,7 +12,7 @@ class MonsterSirenHypergryphMusicIE(InfoExtractor): 'artists': ['塞壬唱片-MSR'], 'album': 'Flame Shadow', 'title': 'Flame Shadow', - } + }, }] def _real_extract(self, url): @@ -28,5 +28,5 @@ class MonsterSirenHypergryphMusicIE(InfoExtractor): 'ext': 'wav', 'vcodec': 'none', 'artists': traverse_obj(json_data, ('player', 'songDetail', 'artists', ...)), - 'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')) + 'album': traverse_obj(json_data, ('musicPlay', 'albumDetail', 'name')), } diff --git a/yt_dlp/extractor/hytale.py b/yt_dlp/extractor/hytale.py index e8cd21a..6956c4e 100644 --- a/yt_dlp/extractor/hytale.py +++ b/yt_dlp/extractor/hytale.py @@ -21,8 +21,8 @@ class HytaleIE(InfoExtractor): 'ext': 'mp4', 'title': 'Avatar Personalization', 'thumbnail': r're:https://videodelivery\.net/\w+/thumbnails/thumbnail\.jpg', - } - }] + }, + }], }, { 'url': 'https://www.hytale.com/news/2019/11/hytale-graphics-update', 'info_dict': { diff --git a/yt_dlp/extractor/icareus.py b/yt_dlp/extractor/icareus.py index d081cf4..3d6e1f9 100644 --- a/yt_dlp/extractor/icareus.py +++ b/yt_dlp/extractor/icareus.py @@ -65,19 +65,19 @@ class IcareusIE(InfoExtractor): }, }, { 'url': 'https://asahitv.fi/fi/web/asahi/player/vod?assetId=89415818', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://hyvinvointitv.fi/fi/web/hyvinvointitv/player/vod?assetId=89149730', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://inez.fi/fi/web/inez-media/player/vod?assetId=71328822', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.permanto.fi/fi/web/alfatv/player/vod?assetId=135497515', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://videos.minifiddlers.org/web/international-minifiddlers/player/vod?assetId=1982759', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -166,7 +166,7 @@ class IcareusIE(InfoExtractor): } thumbnails = info.get('thumbnails') or [{ - 'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail')) + 'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail')), }] return merge_dicts({ diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index c28d09f..a37cfe7 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate @@ -27,21 +26,21 @@ class IchinanaLiveIE(InfoExtractor): @classmethod def suitable(cls, url): - return not IchinanaLiveClipIE.suitable(url) and super(IchinanaLiveIE, cls).suitable(url) + return not IchinanaLiveClipIE.suitable(url) and super().suitable(url) def _real_extract(self, url): video_id = self._match_id(url) - url = 'https://17.live/live/%s' % video_id + url = f'https://17.live/live/{video_id}' enter = self._download_json( - 'https://api-dsa.17app.co/api/v1/lives/%s/enter' % video_id, video_id, + f'https://api-dsa.17app.co/api/v1/lives/{video_id}/enter', video_id, headers={'Referer': url}, fatal=False, expected_status=420, data=b'\0') if enter and enter.get('message') == 'ended': raise ExtractorError('This live has ended.', expected=True) view_data = self._download_json( - 'https://api-dsa.17app.co/api/v1/lives/%s' % video_id, video_id, + f'https://api-dsa.17app.co/api/v1/lives/{video_id}', video_id, headers={'Referer': url}) uploader = traverse_obj( @@ -52,7 +51,7 @@ class IchinanaLiveIE(InfoExtractor): raise ExtractorError('unable to extract live URL information') formats = [] for (name, value) in video_urls[0].items(): - if not isinstance(value, compat_str): + if not isinstance(value, str): continue if not value.startswith('http'): continue @@ -106,10 +105,10 @@ class IchinanaLiveClipIE(InfoExtractor): def _real_extract(self, url): uploader_id, video_id = self._match_valid_url(url).groups() - url = 'https://17.live/profile/r/%s/clip/%s' % (uploader_id, video_id) + url = f'https://17.live/profile/r/{uploader_id}/clip/{video_id}' view_data = self._download_json( - 'https://api-dsa.17app.co/api/v1/clips/%s' % video_id, video_id, + f'https://api-dsa.17app.co/api/v1/clips/{video_id}', video_id, headers={'Referer': url}) uploader = traverse_obj( diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py index 1c4f105..771c185 100644 --- a/yt_dlp/extractor/ign.py +++ b/yt_dlp/extractor/ign.py @@ -2,12 +2,10 @@ import re import urllib.parse from .common import InfoExtractor -from ..compat import compat_parse_qs from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, determine_ext, - error_to_compat_str, extract_attributes, int_or_none, merge_dicts, @@ -22,7 +20,7 @@ from ..utils import ( class IGNBaseIE(InfoExtractor): def _call_api(self, slug): return self._download_json( - 'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug) + f'http://apis.ign.com/{self._PAGE_TYPE}/v3/{self._PAGE_TYPE}s/slug/{slug}', slug) def _checked_call_api(self, slug): try: @@ -106,8 +104,7 @@ class IGNIE(IGNBaseIE): _VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)' _PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?' _VALID_URL = ( - r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)' - % '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE))) + r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:{})'.format('|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))) IE_NAME = 'ign.com' _PAGE_TYPE = 'video' @@ -152,10 +149,10 @@ class IGNIE(IGNBaseIE): grids = re.findall( r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''', webpage) - return filter(None, - (urljoin(url, m.group('path')) for m in re.finditer( - r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1''' - % cls._VIDEO_PATH_RE, grids[0] if grids else ''))) + return filter( + None, (urljoin(url, m.group('path')) for m in re.finditer( + rf'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos{cls._VIDEO_PATH_RE})\1''', + grids[0] if grids else ''))) def _real_extract(self, url): display_id, filt = self._match_valid_url(url).group('id', 'filt') @@ -224,7 +221,7 @@ class IGNVideoIE(IGNBaseIE): webpage, urlh = self._download_webpage_handle(embed_url, video_id) new_url = urlh.url - ign_url = compat_parse_qs( + ign_url = urllib.parse.parse_qs( urllib.parse.urlparse(new_url).query).get('url', [None])[-1] if ign_url: return self.url_result(ign_url, IGNIE.ie_key()) @@ -328,7 +325,7 @@ class IGNArticleIE(IGNBaseIE): 'Content not found: expired?', cause=e.cause, expected=True) elif e.cause.status == 503: - self.report_warning(error_to_compat_str(e.cause)) + self.report_warning(str(e.cause)) return raise @@ -367,7 +364,7 @@ class IGNArticleIE(IGNBaseIE): flashvars = self._search_regex( r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''', m.group('params'), 'flashvars', default='') - flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '') + flashvars = urllib.parse.parse_qs(extract_attributes(flashvars).get('value') or '') v_url = url_or_none((flashvars.get('url') or [None])[-1]) if v_url: yield self.url_result(v_url) diff --git a/yt_dlp/extractor/iheart.py b/yt_dlp/extractor/iheart.py index fb6f51e..21870ca 100644 --- a/yt_dlp/extractor/iheart.py +++ b/yt_dlp/extractor/iheart.py @@ -35,7 +35,7 @@ class IHeartRadioIE(IHeartRadioBaseIE): 'description': 'md5:96cc7297b3a5a9ebae28643801c96fae', 'timestamp': 1597741200, 'upload_date': '20200818', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/ilpost.py b/yt_dlp/extractor/ilpost.py index ae98399..2868f0c 100644 --- a/yt_dlp/extractor/ilpost.py +++ b/yt_dlp/extractor/ilpost.py @@ -28,7 +28,7 @@ class IlPostIE(InfoExtractor): 'availability': 'public', 'series_id': '235598', 'description': '', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/iltalehti.py b/yt_dlp/extractor/iltalehti.py index 0e7e82c..37aa471 100644 --- a/yt_dlp/extractor/iltalehti.py +++ b/yt_dlp/extractor/iltalehti.py @@ -47,5 +47,5 @@ class IltalehtiIE(InfoExtractor): 'state', 'articles', ..., 'items', (('main_media', 'properties'), ('body', ..., 'properties')))) video_ids = traverse_obj(props, (lambda _, v: v['provider'] == 'jwplayer', 'id')) return self.playlist_from_matches( - video_ids, article_id, ie='JWPlatform', getter=lambda id: f'jwplatform:{id}', + video_ids, article_id, ie='JWPlatform', getter=lambda video_id: f'jwplatform:{video_id}', title=traverse_obj(info, ('state', 'articles', ..., 'items', 'canonical_title'), get_all=False)) diff --git a/yt_dlp/extractor/imdb.py b/yt_dlp/extractor/imdb.py index 557a3b7..a786ce3 100644 --- a/yt_dlp/extractor/imdb.py +++ b/yt_dlp/extractor/imdb.py @@ -28,7 +28,7 @@ class ImdbIE(InfoExtractor): 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7', 'duration': 152, 'thumbnail': r're:^https?://.+\.jpg', - } + }, }, { 'url': 'https://www.imdb.com/video/vi3516832537', 'info_dict': { @@ -38,7 +38,7 @@ class ImdbIE(InfoExtractor): 'description': 'md5:17fcc4fe11ec29b4399be9d4c5ef126c', 'duration': 153, 'thumbnail': r're:^https?://.+\.jpg', - } + }, }, { 'url': 'http://www.imdb.com/video/_/vi2524815897', 'only_matching': True, @@ -73,7 +73,7 @@ class ImdbIE(InfoExtractor): 'key': base64.b64encode(json.dumps({ 'type': 'VIDEO_PLAYER', 'subType': 'FORCE_LEGACY', - 'id': 'vi%s' % video_id, + 'id': f'vi{video_id}', }).encode()).decode(), }), lambda x: x[0]['videoLegacyEncodings']) quality = qualities(('SD', '480p', '720p', '1080p')) @@ -132,7 +132,7 @@ class ImdbListIE(InfoExtractor): webpage = self._download_webpage(url, list_id) entries = [ self.url_result('http://www.imdb.com' + m, 'Imdb') - for m in re.findall(r'href="(/list/ls%s/videoplayer/vi[^"]+)"' % list_id, webpage)] + for m in re.findall(rf'href="(/list/ls{list_id}/videoplayer/vi[^"]+)"', webpage)] list_title = self._html_search_regex( r'<h1[^>]+class="[^"]*header[^"]*"[^>]*>(.*?)</h1>', diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py index a40aa21..3a7b5bd 100644 --- a/yt_dlp/extractor/imggaming.py +++ b/yt_dlp/extractor/imggaming.py @@ -73,7 +73,7 @@ class ImgGamingBaseIE(InfoExtractor): if not video_id: continue entries.append(self.url_result( - 'https://%s/video/%s' % (domain, video_id), + f'https://{domain}/video/{video_id}', self.ie_key(), video_id)) return self.playlist_result( entries, media_id, playlist.get('title'), diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index f32c116..f0c3419 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -228,21 +228,18 @@ class ImgurGalleryBaseIE(ImgurBaseIE): if traverse_obj(data, 'is_album'): - def yield_media_ids(): - for m_id in traverse_obj(data, ( - 'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'], - 'id', {lambda x: str_or_none(x) or None})): - yield m_id + items = traverse_obj(data, ( + 'media', lambda _, v: v.get('type') == 'video' or v['metadata']['is_animated'], + 'id', {lambda x: str_or_none(x) or None})) # if a gallery with exactly one video, apply album metadata to video - media_id = ( - self._GALLERY - and traverse_obj(data, ('image_count', {lambda c: c == 1})) - and next(yield_media_ids(), None)) + media_id = None + if self._GALLERY and len(items) == 1: + media_id = items[0] if not media_id: result = self.playlist_result( - map(self._imgur_result, yield_media_ids()), gallery_id) + map(self._imgur_result, items), gallery_id) result.update(info) return result gallery_id = media_id @@ -372,13 +369,13 @@ class ImgurAlbumIE(ImgurGalleryBaseIE): 'url': 'https://imgur.com/a/iX265HX', 'info_dict': { 'id': 'iX265HX', - 'title': 'enen-no-shouboutai' + 'title': 'enen-no-shouboutai', }, 'playlist_count': 2, }, { 'url': 'https://imgur.com/a/8pih2Ed', 'info_dict': { - 'id': '8pih2Ed' + 'id': '8pih2Ed', }, 'playlist_mincount': 1, }] diff --git a/yt_dlp/extractor/ina.py b/yt_dlp/extractor/ina.py index 857013d..ba82201 100644 --- a/yt_dlp/extractor/ina.py +++ b/yt_dlp/extractor/ina.py @@ -14,7 +14,7 @@ class InaIE(InfoExtractor): 'description': 'md5:19f61e2b4844ed4bb2e3df9ab9f527ff', 'upload_date': '20070712', 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/3c4/I12055569.jpeg', - } + }, }, { 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html', 'only_matching': True, diff --git a/yt_dlp/extractor/inc.py b/yt_dlp/extractor/inc.py index 9b3fe9a..f47b8e1 100644 --- a/yt_dlp/extractor/inc.py +++ b/yt_dlp/extractor/inc.py @@ -54,4 +54,4 @@ class IncIE(InfoExtractor): display_id)['vid_kaltura_id'] return self.url_result( - 'kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key()) + f'kaltura:{partner_id}:{kaltura_id}', KalturaIE.ie_key()) diff --git a/yt_dlp/extractor/indavideo.py b/yt_dlp/extractor/indavideo.py index 564bf8a..85e388e 100644 --- a/yt_dlp/extractor/indavideo.py +++ b/yt_dlp/extractor/indavideo.py @@ -80,7 +80,7 @@ class IndavideoEmbedIE(InfoExtractor): height = int_or_none(self._search_regex( r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None)) if not height and len(filesh) == 1: - height = int_or_none(list(filesh.keys())[0]) + height = int_or_none(next(iter(filesh.keys()))) token = filesh.get(str(height)) if token is None: continue @@ -95,7 +95,7 @@ class IndavideoEmbedIE(InfoExtractor): timestamp = parse_iso8601(timestamp + ' +0200', ' ') thumbnails = [{ - 'url': self._proto_relative_url(thumbnail) + 'url': self._proto_relative_url(thumbnail), } for thumbnail in video.get('thumbnails', [])] tags = [tag['title'] for tag in video.get('tags') or []] diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 2bb4850..5274c93 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -1,9 +1,7 @@ +import base64 +import urllib.parse + from .bokecc import BokeCCBaseIE -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, - compat_urlparse, -) from ..utils import ( ExtractorError, determine_ext, @@ -59,7 +57,7 @@ class InfoQIE(BokeCCBaseIE): encoded_id = self._search_regex( r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None) - real_id = compat_urllib_parse_unquote(compat_b64decode(encoded_id).decode('utf-8')) + real_id = urllib.parse.unquote(base64.b64decode(encoded_id).decode('utf-8')) playpath = 'mp4:' + real_id return [{ @@ -98,7 +96,7 @@ class InfoQIE(BokeCCBaseIE): # base URL is found in the Location header in the response returned by # GET https://www.infoq.com/mp3download.action?filename=... when logged in. - http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url) + http_audio_url = urllib.parse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url) http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage)) # audio file seem to be missing some times even if there is a download link diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 46f9cd6..754f710 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -26,9 +26,9 @@ from ..utils import ( _ENCODING_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_' -def _pk_to_id(id): +def _pk_to_id(media_id): """Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id""" - return encode_base_n(int(id.split('_')[0]), table=_ENCODING_CHARS) + return encode_base_n(int(media_id.split('_')[0]), table=_ENCODING_CHARS) def _id_to_pk(shortcode): @@ -113,7 +113,7 @@ class InstagramBaseIE(InfoExtractor): 'height': self._get_dimension('height', node), 'http_headers': { 'Referer': 'https://www.instagram.com/', - } + }, } elif not video_id: continue @@ -148,25 +148,25 @@ class InstagramBaseIE(InfoExtractor): return {} formats = [{ - 'format_id': format.get('id'), - 'url': format.get('url'), - 'width': format.get('width'), - 'height': format.get('height'), + 'format_id': fmt.get('id'), + 'url': fmt.get('url'), + 'width': fmt.get('width'), + 'height': fmt.get('height'), 'vcodec': vcodec, - } for format in videos_list or []] + } for fmt in videos_list or []] if dash_manifest_raw: formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, media_id), mpd_id='dash')) thumbnails = [{ 'url': thumbnail.get('url'), 'width': thumbnail.get('width'), - 'height': thumbnail.get('height') + 'height': thumbnail.get('height'), } for thumbnail in traverse_obj(product_media, ('image_versions2', 'candidates')) or []] return { 'id': media_id, 'duration': float_or_none(product_media.get('video_duration')), 'formats': formats, - 'thumbnails': thumbnails + 'thumbnails': thumbnails, } def _extract_product(self, product_info): @@ -188,7 +188,7 @@ class InstagramBaseIE(InfoExtractor): '__post_extractor': self.extract_comments(_pk_to_id(product_info.get('pk'))), 'http_headers': { 'Referer': 'https://www.instagram.com/', - } + }, } carousel_media = product_info.get('carousel_media') if carousel_media: @@ -204,7 +204,7 @@ class InstagramBaseIE(InfoExtractor): return { **info_dict, - **self._extract_product_media(product_info) + **self._extract_product_media(product_info), } def _get_comments(self, video_id): @@ -246,7 +246,7 @@ class InstagramIOSIE(InfoExtractor): 'comment_count': int, 'comments': list, }, - 'add_ie': ['Instagram'] + 'add_ie': ['Instagram'], }] def _real_extract(self, url): @@ -453,7 +453,7 @@ class InstagramIE(InstagramBaseIE): else: self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).') webpage = self._download_webpage( - f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) + f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) or '' additional_data = self._search_json( r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False) if not additional_data and not media: @@ -520,7 +520,7 @@ class InstagramIE(InstagramBaseIE): return { 'id': video_id, 'formats': formats, - 'title': media.get('title') or 'Video by %s' % username, + 'title': media.get('title') or f'Video by {username}', 'description': description, 'duration': float_or_none(media.get('video_duration')), 'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none), @@ -534,7 +534,7 @@ class InstagramIE(InstagramBaseIE): 'thumbnails': thumbnails, 'http_headers': { 'Referer': 'https://www.instagram.com/', - } + }, } @@ -567,10 +567,10 @@ class InstagramPlaylistBaseIE(InstagramBaseIE): gis_tmpls = [self._gis_tmpl] else: gis_tmpls = [ - '%s' % rhx_gis, + f'{rhx_gis}', '', - '%s:%s' % (rhx_gis, csrf_token), - '%s:%s:%s' % (rhx_gis, csrf_token, self.get_param('http_headers')['User-Agent']), + f'{rhx_gis}:{csrf_token}', + '{}:{}:{}'.format(rhx_gis, csrf_token, self.get_param('http_headers')['User-Agent']), ] # try all of the ways to generate a GIS query, and not only use the @@ -579,10 +579,10 @@ class InstagramPlaylistBaseIE(InstagramBaseIE): try: json_data = self._download_json( 'https://www.instagram.com/graphql/query/', uploader_id, - 'Downloading JSON page %d' % page_num, headers={ + f'Downloading JSON page {page_num}', headers={ 'X-Requested-With': 'XMLHttpRequest', 'X-Instagram-GIS': hashlib.md5( - ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(), + (f'{gis_tmpl}:{variables}').encode()).hexdigest(), }, query={ 'query_hash': self._QUERY_HASH, 'variables': variables, @@ -635,10 +635,10 @@ class InstagramUserIE(InstagramPlaylistBaseIE): 'extract_flat': True, 'skip_download': True, 'playlistend': 5, - } + }, }] - _QUERY_HASH = '42323d64886122307be10013ad2dcc44', + _QUERY_HASH = ('42323d64886122307be10013ad2dcc44',) @staticmethod def _parse_timeline_from(data): @@ -650,7 +650,7 @@ class InstagramUserIE(InstagramPlaylistBaseIE): # returns a dictionary of variables to add to the timeline query based # on the GraphQL of the original page return { - 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id'] + 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id'], } @@ -669,10 +669,10 @@ class InstagramTagIE(InstagramPlaylistBaseIE): 'extract_flat': True, 'skip_download': True, 'playlistend': 50, - } + }, }] - _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314', + _QUERY_HASH = ('f92f56d47dc7a55b606908374b43a314',) @staticmethod def _parse_timeline_from(data): @@ -685,7 +685,7 @@ class InstagramTagIE(InstagramPlaylistBaseIE): # on the GraphQL of the original page return { 'tag_name': - data['entry_data']['TagPage'][0]['graphql']['hashtag']['name'] + data['entry_data']['TagPage'][0]['graphql']['hashtag']['name'], } @@ -699,7 +699,7 @@ class InstagramStoryIE(InstagramBaseIE): 'id': '18090946048123978', 'title': 'Rare', }, - 'playlist_mincount': 50 + 'playlist_mincount': 50, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/internazionale.py b/yt_dlp/extractor/internazionale.py index 1b1cb57..3c3ad7e 100644 --- a/yt_dlp/extractor/internazionale.py +++ b/yt_dlp/extractor/internazionale.py @@ -52,8 +52,8 @@ class InternazionaleIE(InfoExtractor): 'video available aboard', default='1', group='value') video_available_abroad = video_available_abroad == '1' - video_base = 'https://video%s.internazionale.it/%s/%s.' % \ - ('' if video_available_abroad else '-ita', video_path, video_id) + video_base = 'https://video{}.internazionale.it/{}/{}.'.format( + '' if video_available_abroad else '-ita', video_path, video_id) formats = self._extract_m3u8_formats( video_base + 'm3u8', display_id, 'mp4', diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index d5a3d80..ab26dc5 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -200,8 +200,8 @@ class IPrimaCNNIE(InfoExtractor): 'title': 'md5:277c6b1ed0577e51b40ddd35602ff43e', }, 'params': { - 'skip_download': 'm3u8' - } + 'skip_download': 'm3u8', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 85ed549..735b446 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -2,10 +2,10 @@ import hashlib import itertools import re import time +import urllib.parse from .common import InfoExtractor from .openload import PhantomJSwrapper -from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_urlencode from ..utils import ( ExtractorError, clean_html, @@ -30,7 +30,7 @@ from ..utils import ( def md5_text(text): - return hashlib.md5(text.encode('utf-8')).hexdigest() + return hashlib.md5(text.encode()).hexdigest() class IqiyiSDK: @@ -41,17 +41,17 @@ class IqiyiSDK: @staticmethod def split_sum(data): - return compat_str(sum(map(lambda p: int(p, 16), list(data)))) + return str(sum(int(p, 16) for p in data)) @staticmethod def digit_sum(num): if isinstance(num, int): - num = compat_str(num) - return compat_str(sum(map(int, num))) + num = str(num) + return str(sum(map(int, num))) def even_odd(self): - even = self.digit_sum(compat_str(self.timestamp)[::2]) - odd = self.digit_sum(compat_str(self.timestamp)[1::2]) + even = self.digit_sum(str(self.timestamp)[::2]) + odd = self.digit_sum(str(self.timestamp)[1::2]) return even, odd def preprocess(self, chunksize): @@ -65,7 +65,7 @@ class IqiyiSDK: def mod(self, modulus): chunks, ip = self.preprocess(32) - self.target = chunks[0] + ''.join(map(lambda p: compat_str(p % modulus), ip)) + self.target = chunks[0] + ''.join(str(p % modulus) for p in ip) def split(self, chunksize): modulus_map = { @@ -77,7 +77,7 @@ class IqiyiSDK: chunks, ip = self.preprocess(chunksize) ret = '' for i in range(len(chunks)): - ip_part = compat_str(ip[i] % modulus_map[chunksize]) if i < 4 else '' + ip_part = str(ip[i] % modulus_map[chunksize]) if i < 4 else '' if chunksize == 8: ret += ip_part + chunks[i] else: @@ -104,11 +104,11 @@ class IqiyiSDK: self.target = md5_text(self.target) d = time.localtime(self.timestamp) strings = { - 'y': compat_str(d.tm_year), + 'y': str(d.tm_year), 'm': '%02d' % d.tm_mon, 'd': '%02d' % d.tm_mday, } - self.target += ''.join(map(lambda c: strings[c], list(scheme))) + self.target += ''.join(strings[c] for c in scheme) def split_time_even_odd(self): even, odd = self.even_odd() @@ -120,11 +120,11 @@ class IqiyiSDK: def split_ip_time_sum(self): chunks, ip = self.preprocess(32) - self.target = compat_str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp) + self.target = str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp) def split_time_ip_sum(self): chunks, ip = self.preprocess(32) - self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip)) + self.target = self.digit_sum(self.timestamp) + chunks[0] + str(sum(ip)) class IqiyiSDKInterpreter: @@ -157,7 +157,7 @@ class IqiyiSDKInterpreter: elif function in other_functions: other_functions[function]() else: - raise ExtractorError('Unknown function %s' % function) + raise ExtractorError(f'Unknown function {function}') return sdk.target @@ -177,7 +177,7 @@ class IqiyiIE(InfoExtractor): 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', 'ext': 'mp4', 'title': '美国德州空中惊现奇异云团 酷似UFO', - } + }, }, { 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', 'md5': 'b7dc800a4004b1b57749d9abae0472da', @@ -249,8 +249,9 @@ class IqiyiIE(InfoExtractor): note='Get token for logging', errnote='Unable to get token for logging') sdk = data['sdk'] timestamp = int(time.time()) - target = '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % ( - username, self._rsa_fun(password.encode('utf-8'))) + target = ( + f'/apis/reglogin/login.action?lang=zh_TW&area_code=null&email={username}' + f'&passwd={self._rsa_fun(password.encode())}&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1') interp = IqiyiSDKInterpreter(sdk) sign = interp.run(target, data['ip'], timestamp) @@ -264,7 +265,7 @@ class IqiyiIE(InfoExtractor): 'bird_t': timestamp, } validation_result = self._download_json( - 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None, + 'http://kylin.iqiyi.com/validate?' + urllib.parse.urlencode(validation_params), None, note='Validate credentials', errnote='Unable to validate credentials') MSG_MAP = { @@ -276,7 +277,7 @@ class IqiyiIE(InfoExtractor): if code != 'A00000': msg = MSG_MAP.get(code) if not msg: - msg = 'error %s' % code + msg = f'error {code}' if validation_result.get('msg'): msg += ': ' + validation_result['msg'] self.report_warning('unable to log in: ' + msg) @@ -288,7 +289,7 @@ class IqiyiIE(InfoExtractor): tm = int(time.time() * 1000) key = 'd5fb4bd9d50c4be6948c97edd7254b0e' - sc = md5_text(compat_str(tm) + key + tvid) + sc = md5_text(str(tm) + key + tvid) params = { 'tvid': tvid, 'vid': video_id, @@ -298,7 +299,7 @@ class IqiyiIE(InfoExtractor): } return self._download_json( - 'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id), + f'http://cache.m.iqiyi.com/jp/tmts/{tvid}/{video_id}/', video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='), query=params, headers=self.geo_verification_headers()) @@ -321,10 +322,10 @@ class IqiyiIE(InfoExtractor): # Start from 2 because links in the first page are already on webpage for page_num in itertools.count(2): pagelist_page = self._download_webpage( - 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE), + f'http://cache.video.qiyi.com/jp/avlist/{album_id}/{page_num}/{PAGE_SIZE}/', album_id, - note='Download playlist page %d' % page_num, - errnote='Failed to download playlist page %d' % page_num) + note=f'Download playlist page {page_num}', + errnote=f'Failed to download playlist page {page_num}') pagelist = self._parse_json( remove_start(pagelist_page, 'var tvInfoJs='), album_id) vlist = pagelist['data']['vlist'] @@ -367,7 +368,7 @@ class IqiyiIE(InfoExtractor): for stream in data['vidl']: if 'm3utx' not in stream: continue - vd = compat_str(stream['vd']) + vd = str(stream['vd']) formats.append({ 'url': stream['m3utx'], 'format_id': vd, @@ -416,11 +417,11 @@ class IqIE(InfoExtractor): 'params': { 'format': '500', }, - 'expected_warnings': ['format is restricted'] + 'expected_warnings': ['format is restricted'], }, { # VIP-restricted video 'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4', - 'only_matching': True + 'only_matching': True, }] _BID_TAGS = { '100': '240P', @@ -562,7 +563,7 @@ class IqIE(InfoExtractor): return self._BID_TAGS = { bid: traverse_obj(extracted_bid_tags, (bid, 'value'), expected_type=str, default=self._BID_TAGS.get(bid)) - for bid in extracted_bid_tags.keys() + for bid in extracted_bid_tags } def _get_cookie(self, name, default=None): @@ -580,7 +581,7 @@ class IqIE(InfoExtractor): uid = traverse_obj( self._parse_json( - self._get_cookie('I00002', '{}'), video_id, transform_source=compat_urllib_parse_unquote, fatal=False), + self._get_cookie('I00002', '{}'), video_id, transform_source=urllib.parse.unquote, fatal=False), ('data', 'uid'), default=0) if uid: @@ -590,7 +591,7 @@ class IqIE(InfoExtractor): 'platformId': 3, 'modeCode': self._get_cookie('mod', 'intl'), 'langCode': self._get_cookie('lang', 'en_us'), - 'deviceId': self._get_cookie('QC005', '') + 'deviceId': self._get_cookie('QC005', ''), }, fatal=False) ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none) else: @@ -621,7 +622,7 @@ class IqIE(InfoExtractor): preview_time = traverse_obj( initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False) if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none): - self.report_warning('This preview video is limited%s' % format_field(preview_time, None, ' to %s seconds')) + self.report_warning('This preview video is limited{}'.format(format_field(preview_time, None, ' to %s seconds'))) # TODO: Extract audio-only formats for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none)): @@ -672,7 +673,7 @@ class IqIE(InfoExtractor): f.update({ 'quality': qualities(list(self._BID_TAGS.keys()))(bid), 'format_note': self._BID_TAGS[bid], - **parse_resolution(video_format.get('scrsz')) + **parse_resolution(video_format.get('scrsz')), }) formats.extend(extracted_formats) @@ -680,7 +681,7 @@ class IqIE(InfoExtractor): lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name')) subtitles.setdefault(lang, []).extend([{ 'ext': format_ext, - 'url': urljoin(initial_format_data.get('dstl', 'http://meta.video.iqiyi.com'), sub_format[format_key]) + 'url': urljoin(initial_format_data.get('dstl', 'http://meta.video.iqiyi.com'), sub_format[format_key]), } for format_key, format_ext in [('srt', 'srt'), ('webvtt', 'vtt')] if sub_format.get(format_key)]) extra_metadata = page_data.get('albumInfo') if video_info.get('albumId') and page_data.get('albumInfo') else video_info @@ -709,9 +710,9 @@ class IqAlbumIE(InfoExtractor): 'info_dict': { 'id': '1bk9icvr331', 'title': 'One Piece', - 'description': 'Subtitle available on Sunday 4PM(GMT+8).' + 'description': 'Subtitle available on Sunday 4PM(GMT+8).', }, - 'playlist_mincount': 238 + 'playlist_mincount': 238, }, { # Movie/single video 'url': 'https://www.iq.com/album/九龙城寨-2021-22yjnij099k', @@ -728,7 +729,7 @@ class IqAlbumIE(InfoExtractor): 'age_limit': 13, 'average_rating': float, }, - 'expected_warnings': ['format is restricted'] + 'expected_warnings': ['format is restricted'], }] def _entries(self, album_id_num, page_ranges, album_id=None, mode_code='intl', lang_code='en_us'): @@ -741,7 +742,7 @@ class IqAlbumIE(InfoExtractor): 'modeCode': mode_code, 'langCode': lang_code, 'endOrder': page_range['to'], - 'startOrder': page_range['from'] + 'startOrder': page_range['from'], }) for video in page['data']['epg']: yield self.url_result('https://www.iq.com/play/%s' % (video.get('playLocSuffix') or video['qipuIdStr']), @@ -754,7 +755,7 @@ class IqAlbumIE(InfoExtractor): album_data = next_data['props']['initialState']['album']['videoAlbumInfo'] if album_data.get('videoType') == 'singleVideo': - return self.url_result('https://www.iq.com/play/%s' % album_id, IqIE.ie_key()) + return self.url_result(f'https://www.iq.com/play/{album_id}', IqIE.ie_key()) return self.playlist_result( self._entries(album_data['albumId'], album_data['totalPageRange'], album_id, traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'modeCode')), diff --git a/yt_dlp/extractor/islamchannel.py b/yt_dlp/extractor/islamchannel.py index 253a846..f70c3ad 100644 --- a/yt_dlp/extractor/islamchannel.py +++ b/yt_dlp/extractor/islamchannel.py @@ -14,7 +14,7 @@ class IslamChannelIE(InfoExtractor): 'description': 'md5:5cc7ddecef064ea7afe52eb5e0e33b55', 'thumbnail': r're:https?://.+', 'ext': 'mp4', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/israelnationalnews.py b/yt_dlp/extractor/israelnationalnews.py index 35040f5..76e54d9 100644 --- a/yt_dlp/extractor/israelnationalnews.py +++ b/yt_dlp/extractor/israelnationalnews.py @@ -7,7 +7,7 @@ class IsraelNationalNewsIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.israelnationalnews.com/news/354520', 'info_dict': { - 'id': '354520' + 'id': '354520', }, 'playlist': [{ 'info_dict': { @@ -34,8 +34,8 @@ class IsraelNationalNewsIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCJdKr0Bgd_5saZYqLCa9mng', 'upload_date': '20220606', 'uploader': 'The Rubin Report', - } - }] + }, + }], }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py index 5d6fbaa..71001c4 100644 --- a/yt_dlp/extractor/itprotv.py +++ b/yt_dlp/extractor/itprotv.py @@ -12,7 +12,7 @@ from ..utils import ( class ITProTVBaseIE(InfoExtractor): _ENDPOINTS = { 'course': 'course?url={}&brand=00002560-0000-3fa9-0000-1d61000035f3', - 'episode': 'brand/00002560-0000-3fa9-0000-1d61000035f3/episode?url={}' + 'episode': 'brand/00002560-0000-3fa9-0000-1d61000035f3/episode?url={}', } def _call_api(self, ep, item_id, webpage): @@ -46,7 +46,7 @@ class ITProTVIE(ITProTVBaseIE): 'availability': 'needs_auth', 'chapter': 'ITProTV 101', 'chapter_number': 1, - 'chapter_id': '5dbb3de426b46c0010b5d1b6' + 'chapter_id': '5dbb3de426b46c0010b5d1b6', }, }, { @@ -64,7 +64,7 @@ class ITProTVIE(ITProTVBaseIE): 'availability': 'needs_auth', 'chapter': 'Job Development', 'chapter_number': 2, - 'chapter_id': '5f7c78d424330c000edf04d9' + 'chapter_id': '5f7c78d424330c000edf04d9', }, }] @@ -95,7 +95,7 @@ class ITProTVIE(ITProTVBaseIE): 'chapter_number': chapter_number, 'chapter_id': str_or_none(chapter.get('id')), 'subtitles': { - 'en': [{'ext': 'vtt', 'data': episode['enCaptionData']}] + 'en': [{'ext': 'vtt', 'data': episode['enCaptionData']}], } if episode.get('enCaptionData') else None, } @@ -110,16 +110,16 @@ class ITProTVCourseIE(ITProTVBaseIE): 'description': 'md5:b175c2c3061ce35a4dd33865b2c1da4e', 'title': 'ITProTV 101', }, - 'playlist_count': 6 + 'playlist_count': 6, }, { 'url': 'https://app.itpro.tv/course/beyond-tech', 'info_dict': { 'id': 'beyond-tech', 'description': 'md5:44cd99855e7f81a15ce1269bd0621fed', - 'title': 'Beyond Tech' + 'title': 'Beyond Tech', }, - 'playlist_count': 15 + 'playlist_count': 15, }, ] diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 55c4165..89e6f18 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -2,7 +2,6 @@ import json from .brightcove import BrightcoveNewIE from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( JSON_LD_RE, ExtractorError, @@ -34,7 +33,7 @@ class ITVIE(InfoExtractor): 'series': 'Plebs', 'season_number': 1, 'episode_number': 1, - 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002' + 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002', }, 'params': { # m3u8 download @@ -50,7 +49,7 @@ class ITVIE(InfoExtractor): 'series': 'The Jonathan Ross Show', 'episode_number': 8, 'season_number': 17, - 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002' + 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002', }, 'params': { # m3u8 download @@ -83,7 +82,7 @@ class ITVIE(InfoExtractor): 'user': { 'itvUserId': '', 'entitlements': [], - 'token': '' + 'token': '', }, 'device': { 'manufacturer': 'Safari', @@ -91,20 +90,20 @@ class ITVIE(InfoExtractor): 'os': { 'name': 'Windows NT', 'version': '6.1', - 'type': 'desktop' - } + 'type': 'desktop', + }, }, 'client': { 'version': '4.1', - 'id': 'browser' + 'id': 'browser', }, 'variantAvailability': { 'featureset': { 'min': featureset, - 'max': featureset + 'max': featureset, }, - 'platformTag': platform_tag - } + 'platformTag': platform_tag, + }, }).encode(), headers=headers, fatal=fatal) def _get_subtitles(self, video_id, variants, ios_playlist_url, headers, *args, **kwargs): @@ -136,7 +135,7 @@ class ITVIE(InfoExtractor): params = extract_attributes(self._search_regex( r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params')) variants = self._parse_json( - try_get(params, lambda x: x['data-video-variants'], compat_str) or '{}', + try_get(params, lambda x: x['data-video-variants'], str) or '{}', video_id, fatal=False) # Prefer last matching featureset # See: https://github.com/yt-dlp/yt-dlp/issues/986 @@ -185,7 +184,7 @@ class ITVIE(InfoExtractor): break thumbnails = [] - thumbnail_url = try_get(params, lambda x: x['data-video-posterframe'], compat_str) + thumbnail_url = try_get(params, lambda x: x['data-video-posterframe'], str) if thumbnail_url: thumbnails.extend([{ 'url': thumbnail_url.format(width=1920, height=1080, quality=100, blur=0, bg='false'), @@ -193,7 +192,7 @@ class ITVIE(InfoExtractor): 'height': 1080, }, { 'url': urljoin(base_url(thumbnail_url), url_basename(thumbnail_url)), - 'preference': -2 + 'preference': -2, }]) thumbnail_url = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None) @@ -210,7 +209,7 @@ class ITVIE(InfoExtractor): 'subtitles': self.extract_subtitles(video_id, variants, ios_playlist_url, headers), 'duration': parse_duration(video_data.get('Duration')), 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)), - 'thumbnails': thumbnails + 'thumbnails': thumbnails, }, info) @@ -227,9 +226,9 @@ class ITVBTCCIE(InfoExtractor): 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', 'info_dict': { 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', - 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32' + 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32', }, - 'playlist_count': 4 + 'playlist_count': 4, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' @@ -254,7 +253,7 @@ class ITVBTCCIE(InfoExtractor): # ITV does not like some GB IP ranges, so here are some # IP blocks it accepts 'geo_ip_blocks': [ - '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21' + '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21', ], 'referrer': url, }), diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index fa5ceec..57c276a 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -82,10 +82,10 @@ class IviIE(InfoExtractor): 'params': [ video_id, { 'site': 's%d', - 'referrer': 'http://www.ivi.ru/watch/%s' % video_id, - 'contentid': video_id - } - ] + 'referrer': f'http://www.ivi.ru/watch/{video_id}', + 'contentid': video_id, + }, + ], }) for site in (353, 183): @@ -98,7 +98,7 @@ class IviIE(InfoExtractor): self._LIGHT_URL, video_id, 'Downloading timestamp JSON', data=json.dumps({ 'method': 'da.timestamp.get', - 'params': [] + 'params': [], }).encode(), fatal=False) or {}).get('result') if not timestamp: continue @@ -158,7 +158,7 @@ class IviIE(InfoExtractor): compilation = result.get('compilation') episode = title if compilation else None - title = '%s - %s' % (compilation, title) if compilation is not None else title + title = f'{compilation} - {title}' if compilation is not None else title thumbnails = [{ 'url': preview['url'], @@ -219,9 +219,9 @@ class IviCompilationIE(InfoExtractor): def _extract_entries(self, html, compilation_id): return [ self.url_result( - 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key()) + f'http://www.ivi.ru/watch/{compilation_id}/{serie}', IviIE.ie_key()) for serie in re.findall( - r'<a\b[^>]+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)] + rf'<a\b[^>]+\bhref=["\']/watch/{compilation_id}/(\d+)["\']', html)] def _real_extract(self, url): mobj = self._match_valid_url(url) @@ -230,8 +230,8 @@ class IviCompilationIE(InfoExtractor): if season_id is not None: # Season link season_page = self._download_webpage( - url, compilation_id, 'Downloading season %s web page' % season_id) - playlist_id = '%s/season%s' % (compilation_id, season_id) + url, compilation_id, f'Downloading season {season_id} web page') + playlist_id = f'{compilation_id}/season{season_id}' playlist_title = self._html_search_meta('title', season_page, 'title') entries = self._extract_entries(season_page, compilation_id) else: # Compilation link @@ -239,15 +239,15 @@ class IviCompilationIE(InfoExtractor): playlist_id = compilation_id playlist_title = self._html_search_meta('title', compilation_page, 'title') seasons = re.findall( - r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page) + rf'<a href="/watch/{compilation_id}/season(\d+)', compilation_page) if not seasons: # No seasons in this compilation entries = self._extract_entries(compilation_page, compilation_id) else: entries = [] for season_id in seasons: season_page = self._download_webpage( - 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id), - compilation_id, 'Downloading season %s web page' % season_id) + f'http://www.ivi.ru/watch/{compilation_id}/season{season_id}', + compilation_id, f'Downloading season {season_id} web page') entries.extend(self._extract_entries(season_page, compilation_id)) return self.playlist_result(entries, playlist_id, playlist_title) diff --git a/yt_dlp/extractor/ivideon.py b/yt_dlp/extractor/ivideon.py index 7d1e554..eb860c7 100644 --- a/yt_dlp/extractor/ivideon.py +++ b/yt_dlp/extractor/ivideon.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) from ..utils import qualities @@ -21,7 +19,7 @@ class IvideonIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'https://www.ivideon.com/tv/camera/100-c4ee4cb9ede885cf62dfbe93d7b53783/589824/?lang=ru', 'only_matching': True, @@ -36,8 +34,8 @@ class IvideonIE(InfoExtractor): mobj = self._match_valid_url(url) server_id, camera_id = mobj.group('id'), mobj.group('camera_id') camera_name, description = None, None - camera_url = compat_urlparse.urljoin( - url, '/tv/camera/%s/%s/' % (server_id, camera_id)) + camera_url = urllib.parse.urljoin( + url, f'/tv/camera/{server_id}/{camera_id}/') webpage = self._download_webpage(camera_url, server_id, fatal=False) if webpage: @@ -57,12 +55,12 @@ class IvideonIE(InfoExtractor): quality = qualities(self._QUALITIES) formats = [{ - 'url': 'https://streaming.ivideon.com/flv/live?%s' % compat_urllib_parse_urlencode({ + 'url': 'https://streaming.ivideon.com/flv/live?{}'.format(urllib.parse.urlencode({ 'server': server_id, 'camera': camera_id, 'sessionId': 'demo', 'q': quality(format_id), - }), + })), 'format_id': format_id, 'ext': 'flv', 'quality': quality(format_id), diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index a11f3f1..5b5c367 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -2,7 +2,6 @@ import functools import hashlib import json import time -import urllib.error import urllib.parse from .common import InfoExtractor @@ -41,7 +40,7 @@ class IwaraBaseIE(InfoExtractor): 'https://api.iwara.tv/user/login', None, note='Logging in', headers={'Content-Type': 'application/json'}, data=json.dumps({ 'email': username, - 'password': password + 'password': password, }).encode(), expected_status=lambda x: True) user_token = traverse_obj(response, ('token', {str})) if not user_token: @@ -65,7 +64,7 @@ class IwaraBaseIE(InfoExtractor): 'https://api.iwara.tv/user/token', None, note='Fetching media token', data=b'', headers={ 'Authorization': f'Bearer {IwaraBaseIE._USERTOKEN}', - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', })['accessToken'] return {'Authorization': f'Bearer {IwaraBaseIE._MEDIATOKEN}'} @@ -107,7 +106,7 @@ class IwaraIE(IwaraBaseIE): 'uploader': 'Lyu ya', 'uploader_id': 'user792540', 'tags': [ - 'uncategorized' + 'uncategorized', ], 'like_count': int, 'view_count': int, @@ -129,7 +128,7 @@ class IwaraIE(IwaraBaseIE): 'uploader': 'Fe_Kurosabi', 'uploader_id': 'fekurosabi', 'tags': [ - 'pee' + 'pee', ], 'like_count': int, 'view_count': int, diff --git a/yt_dlp/extractor/ixigua.py b/yt_dlp/extractor/ixigua.py index 1f086d2..2868c2f 100644 --- a/yt_dlp/extractor/ixigua.py +++ b/yt_dlp/extractor/ixigua.py @@ -29,7 +29,7 @@ class IxiguaIE(InfoExtractor): 'thumbnail': r're:^https?://.+\.(avif|webp)', 'timestamp': 1629088414, 'duration': 1030, - } + }, }] def _get_json_data(self, webpage, video_id): diff --git a/yt_dlp/extractor/izlesene.py b/yt_dlp/extractor/izlesene.py index 5cdf870..cf2a269 100644 --- a/yt_dlp/extractor/izlesene.py +++ b/yt_dlp/extractor/izlesene.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) from ..utils import ( determine_ext, float_or_none, @@ -33,7 +31,7 @@ class IzleseneIE(InfoExtractor): 'upload_date': '20140702', 'duration': 95.395, 'age_limit': 0, - } + }, }, { 'url': 'http://www.izlesene.com/video/tarkan-dortmund-2006-konseri/17997', @@ -48,14 +46,14 @@ class IzleseneIE(InfoExtractor): 'upload_date': '20061112', 'duration': 253.666, 'age_limit': 0, - } + }, }, ] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('http://www.izlesene.com/video/%s' % video_id, video_id) + webpage = self._download_webpage(f'http://www.izlesene.com/video/{video_id}', video_id) video = self._parse_json( self._search_regex( @@ -67,14 +65,14 @@ class IzleseneIE(InfoExtractor): formats = [] for stream in video['media']['level']: source_url = stream.get('source') - if not source_url or not isinstance(source_url, compat_str): + if not source_url or not isinstance(source_url, str): continue ext = determine_ext(url, 'mp4') quality = stream.get('value') height = int_or_none(quality) formats.append({ - 'format_id': '%sp' % quality if quality else 'sd', - 'url': compat_urllib_parse_unquote(source_url), + 'format_id': f'{quality}p' if quality else 'sd', + 'url': urllib.parse.unquote(source_url), 'ext': ext, 'height': height, }) diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index 8557a81..16540c4 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -2,7 +2,6 @@ import hashlib import random from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( clean_html, int_or_none, @@ -40,20 +39,20 @@ class JamendoIE(InfoExtractor): 'like_count': int, 'average_rating': int, 'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'], - } + }, }, { 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock', 'only_matching': True, }] def _call_api(self, resource, resource_id, fatal=True): - path = '/api/%ss' % resource - rand = compat_str(random.random()) + path = f'/api/{resource}s' + rand = str(random.random()) return self._download_json( 'https://www.jamendo.com' + path, resource_id, fatal=fatal, query={ 'id[]': resource_id, }, headers={ - 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand) + 'X-Jam-Call': f'${hashlib.sha1((path + rand).encode()).hexdigest()}*{rand}~', })[0] def _real_extract(self, url): @@ -72,12 +71,11 @@ class JamendoIE(InfoExtractor): # if artist_name: # title = '%s - %s' % (artist_name, title) # album = get_model('album') - artist = self._call_api("artist", track.get('artistId'), fatal=False) - album = self._call_api("album", track.get('albumId'), fatal=False) + artist = self._call_api('artist', track.get('artistId'), fatal=False) + album = self._call_api('album', track.get('albumId'), fatal=False) formats = [{ - 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' - % (sub_domain, track_id, format_id), + 'url': f'https://{sub_domain}.jamendo.com/?trackid={track_id}&format={format_id}&from=app-97dab294', 'format_id': format_id, 'ext': ext, 'quality': quality, @@ -111,7 +109,7 @@ class JamendoIE(InfoExtractor): tags.append(tag_name) stats = track.get('stats') or {} - license = track.get('licenseCC') or [] + video_license = track.get('licenseCC') or [] return { 'id': track_id, @@ -124,7 +122,7 @@ class JamendoIE(InfoExtractor): 'track': track_name, 'album': album.get('name'), 'formats': formats, - 'license': '-'.join(license) if license else None, + 'license': '-'.join(video_license) if video_license else None, 'timestamp': int_or_none(track.get('dateCreated')), 'view_count': int_or_none(stats.get('listenedAll')), 'like_count': int_or_none(stats.get('favorited')), @@ -160,7 +158,7 @@ class JamendoAlbumIE(JamendoIE): # XXX: Do not subclass from concrete IE 'average_rating': 4, 'tags': ['rock', 'drums', 'bass', 'world', 'punk', 'neutral'], 'like_count': int, - } + }, }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { @@ -179,11 +177,11 @@ class JamendoAlbumIE(JamendoIE): # XXX: Do not subclass from concrete IE 'average_rating': 4, 'license': 'by', 'like_count': int, - } + }, }], 'params': { - 'playlistend': 2 - } + 'playlistend': 2, + }, }] def _real_extract(self, url): @@ -196,7 +194,7 @@ class JamendoAlbumIE(JamendoIE): # XXX: Do not subclass from concrete IE track_id = track.get('id') if not track_id: continue - track_id = compat_str(track_id) + track_id = str(track_id) entries.append({ '_type': 'url_transparent', 'url': 'https://www.jamendo.com/track/' + track_id, @@ -207,4 +205,4 @@ class JamendoAlbumIE(JamendoIE): # XXX: Do not subclass from concrete IE return self.playlist_result( entries, album_id, album_name, - clean_html(try_get(album, lambda x: x['description']['en'], compat_str))) + clean_html(try_get(album, lambda x: x['description']['en'], str))) diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py index 19d2b92..2ef091a 100644 --- a/yt_dlp/extractor/japandiet.py +++ b/yt_dlp/extractor/japandiet.py @@ -41,7 +41,7 @@ def _parse_japanese_duration(text): mobj = re.search(r'(?:(\d+)日間?)?(?:(\d+)時間?)?(?:(\d+)分)?(?:(\d+)秒)?', re.sub(r'[\s\u3000]+', '', text or '')) if not mobj: return - days, hours, mins, secs = [int_or_none(x, default=0) for x in mobj.groups()] + days, hours, mins, secs = (int_or_none(x, default=0) for x in mobj.groups()) return secs + mins * 60 + hours * 60 * 60 + days * 24 * 60 * 60 @@ -142,10 +142,10 @@ class ShugiinItvVodIE(ShugiinItvBaseIE): 'title': 'ウクライナ大統領国会演説(オンライン)', 'release_date': '20220323', 'chapters': 'count:4', - } + }, }, { 'url': 'https://www.shugiintv.go.jp/en/index.php?ex=VL&media_type=&deli_id=53846', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -232,7 +232,7 @@ class SangiinIE(InfoExtractor): 'is_live': True, }, 'skip': 'this live is turned into archive after it ends', - }, ] + }] def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py index e7186d7..30d98ba 100644 --- a/yt_dlp/extractor/jiocinema.py +++ b/yt_dlp/extractor/jiocinema.py @@ -157,11 +157,11 @@ class JioCinemaBaseIE(InfoExtractor): 'info': { 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'}, 'androidId': self._DEVICE_ID, - 'type': 'iOS' if is_iphone else 'Android' - } + 'type': 'iOS' if is_iphone else 'Android', + }, }, **initial_data, - 'otp': self._get_tfa_info('the one-time password sent to your phone') + 'otp': self._get_tfa_info('the one-time password sent to your phone'), }, 'Submitting OTP') if traverse_obj(response, 'code') == 1043: raise ExtractorError('Wrong OTP', expected=True) @@ -276,12 +276,12 @@ class JioCinemaIE(JioCinemaBaseIE): 'aesSupport': 'yes', 'fairPlayDrmSupport': 'none', 'playreadyDrmSupport': 'none', - 'widevineDRMSupport': 'none' + 'widevineDRMSupport': 'none', }, 'frameRateCapability': [{ 'frameRateSupport': '30fps', - 'videoQuality': '1440p' - }] + 'videoQuality': '1440p', + }], }, 'continueWatchingRequired': False, 'dolby': False, @@ -293,7 +293,7 @@ class JioCinemaIE(JioCinemaBaseIE): 'multiAudioRequired': True, 'osVersion': '10', 'parentalPinValid': True, - 'x-apisignatures': self._API_SIGNATURES + 'x-apisignatures': self._API_SIGNATURES, }) status_code = traverse_obj(playback, ('code', {int})) @@ -364,20 +364,25 @@ class JioCinemaSeriesIE(JioCinemaBaseIE): 'title': 'naagin', }, 'playlist_mincount': 120, + }, { + 'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820', + 'info_dict': { + 'id': '3499820', + 'title': 'mtv-splitsvilla-x5', + }, + 'playlist_mincount': 310, }] def _entries(self, series_id): - seasons = self._download_json( - f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id, - 'Downloading series metadata JSON', query={ - 'sort': 'season:asc', - 'id': series_id, - 'responseType': 'common', - }) + seasons = traverse_obj(self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id, + 'Downloading series metadata JSON', query={'responseType': 'common'}), ( + 'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter', + 'trayTabs', lambda _, v: v['id'])) - for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1): + for season_num, season in enumerate(seasons, start=1): season_id = season['id'] - label = season.get('season') or season_num + label = season.get('label') or season_num for page_num in itertools.count(1): episodes = traverse_obj(self._download_json( f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode', diff --git a/yt_dlp/extractor/jiosaavn.py b/yt_dlp/extractor/jiosaavn.py index 35fb3fd..542e41b 100644 --- a/yt_dlp/extractor/jiosaavn.py +++ b/yt_dlp/extractor/jiosaavn.py @@ -27,7 +27,7 @@ class JioSaavnBaseIE(InfoExtractor): if invalid_bitrates := set(requested_bitrates) - self._VALID_BITRATES: raise ValueError( f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. ' - + f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}') + f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}') return requested_bitrates def _extract_formats(self, song_data): diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py index ea46042..0c8e999 100644 --- a/yt_dlp/extractor/joj.py +++ b/yt_dlp/extractor/joj.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( format_field, int_or_none, @@ -25,7 +24,7 @@ class JojIE(InfoExtractor): 'title': 'NOVÉ BÝVANIE', 'thumbnail': r're:^https?://.*?$', 'duration': 3118, - } + }, }, { 'url': 'https://media.joj.sk/embed/CSM0Na0l0p1', 'info_dict': { @@ -35,7 +34,7 @@ class JojIE(InfoExtractor): 'title': 'Extrémne rodiny 2 - POKRAČOVANIE (2012/04/09 21:30:00)', 'duration': 3937, 'thumbnail': r're:^https?://.*?$', - } + }, }, { 'url': 'https://media.joj.sk/embed/9i1cxv', 'only_matching': True, @@ -51,7 +50,7 @@ class JojIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://media.joj.sk/embed/%s' % video_id, video_id) + f'https://media.joj.sk/embed/{video_id}', video_id) title = (self._search_json(r'videoTitle\s*:', webpage, 'title', video_id, contains_pattern=r'["\'].+["\']', default=None) @@ -66,7 +65,7 @@ class JojIE(InfoExtractor): formats = [] for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []: - if isinstance(format_url, compat_str): + if isinstance(format_url, str): height = self._search_regex( r'(\d+)[pP]|(pal)\.', format_url, 'height', default=None) if height == 'pal': @@ -78,7 +77,7 @@ class JojIE(InfoExtractor): }) if not formats: playlist = self._download_xml( - 'https://media.joj.sk/services/Video.php?clip=%s' % video_id, + f'https://media.joj.sk/services/Video.php?clip={video_id}', video_id) for file_el in playlist.findall('./files/file'): path = file_el.get('path') @@ -86,8 +85,8 @@ class JojIE(InfoExtractor): continue format_id = file_el.get('id') or file_el.get('label') formats.append({ - 'url': 'http://n16.joj.sk/storage/%s' % path.replace( - 'dat/', '', 1), + 'url': 'http://n16.joj.sk/storage/{}'.format(path.replace( + 'dat/', '', 1)), 'format_id': format_id, 'height': int_or_none(self._search_regex( r'(\d+)[pP]', format_id or path, 'height', diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py index 8069fea..6b37ccf 100644 --- a/yt_dlp/extractor/jove.py +++ b/yt_dlp/extractor/jove.py @@ -16,7 +16,7 @@ class JoveIE(InfoExtractor): 'description': 'md5:015dd4509649c0908bc27f049e0262c6', 'thumbnail': r're:^https?://.*\.png$', 'upload_date': '20110523', - } + }, }, { 'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation', @@ -28,7 +28,7 @@ class JoveIE(InfoExtractor): 'description': 'md5:35ff029261900583970c4023b70f1dc9', 'thumbnail': r're:^https?://.*\.png$', 'upload_date': '20140802', - } + }, }, ] diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py index bc47aa6..7d5a931 100644 --- a/yt_dlp/extractor/jwplatform.py +++ b/yt_dlp/extractor/jwplatform.py @@ -18,7 +18,7 @@ class JWPlatformIE(InfoExtractor): 'timestamp': 1227796140, 'duration': 32.0, 'thumbnail': 'https://cdn.jwplayer.com/v2/media/nPripu9l/poster.jpg?width=720', - } + }, }, { 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js', 'only_matching': True, @@ -72,7 +72,7 @@ class JWPlatformIE(InfoExtractor): # <input value=URL> is used by hyland.com # if we find <iframe>, dont look for <input> ret = re.findall( - r'<%s[^>]+?%s=\\?["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key), + rf'<{tag}[^>]+?{key}=\\?["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{{8}})', webpage) if ret: return ret diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 563aa2d..6f3459d 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -33,7 +33,7 @@ class KakaoIE(InfoExtractor): 'view_count': int, 'duration': 1503, 'comment_count': int, - } + }, }, { 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180', 'md5': 'a8917742069a4dd442516b86e7d66529', @@ -52,7 +52,7 @@ class KakaoIE(InfoExtractor): 'view_count': int, 'duration': 184, 'comment_count': int, - } + }, }, { # geo restricted 'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491', @@ -76,7 +76,7 @@ class KakaoIE(InfoExtractor): 'description', 'channelId', 'createTime', 'duration', 'playCount', 'likeCount', 'commentCount', 'tagList', 'channel', 'name', 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault', - 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label']) + 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label']), } api_json = self._download_json( @@ -99,7 +99,7 @@ class KakaoIE(InfoExtractor): try: fmt_url_json = self._download_json( cdn_api_base, video_id, query=query, - note='Downloading video URL for profile %s' % profile_name) + note=f'Downloading video URL for profile {profile_name}') except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 403: resp = self._parse_json(e.cause.response.read().decode(), video_id) @@ -126,7 +126,7 @@ class KakaoIE(InfoExtractor): thumbs.append({ 'url': thumb.get('thumbnailUrl'), 'id': str(thumb.get('timeInSec')), - 'preference': -1 if thumb.get('isDefault') else 0 + 'preference': -1 if thumb.get('isDefault') else 0, }) top_thumbnail = clip.get('thumbnailUrl') if top_thumbnail: diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index 4752d5a..e5737b1 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -1,12 +1,10 @@ import base64 +import contextlib import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) from ..utils import ( ExtractorError, clean_html, @@ -57,7 +55,7 @@ class KalturaIE(InfoExtractor): 'thumbnail': 're:^https?://.*/thumbnail/.*', 'timestamp': int, }, - 'skip': 'The access to this service is forbidden since the specified partner is blocked' + 'skip': 'The access to this service is forbidden since the specified partner is blocked', }, { 'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4', @@ -124,14 +122,14 @@ class KalturaIE(InfoExtractor): 'view_count': int, 'upload_date': '20140815', 'thumbnail': 'http://cfvod.kaltura.com/p/691292/sp/69129200/thumbnail/entry_id/0_c076mna6/version/100022', - } + }, }, { # html5lib playlist URL using kwidget player 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.89/mwEmbedFrame.php/p/2019031/uiconf_id/40436601?wid=1_4j3m32cv&iframeembed=true&playerId=kaltura_player_&flashvars[playlistAPI.kpl0Id]=1_jovey5nu&flashvars[ks]=&&flashvars[imageDefaultDuration]=30&flashvars[localizationCode]=en&flashvars[leadWithHTML5]=true&flashvars[forceMobileHTML5]=true&flashvars[nextPrevBtn.plugin]=true&flashvars[hotspots.plugin]=true&flashvars[sideBarContainer.plugin]=true&flashvars[sideBarContainer.position]=left&flashvars[sideBarContainer.clickToClose]=true&flashvars[chapters.plugin]=true&flashvars[chapters.layout]=vertical&flashvars[chapters.thumbnailRotator]=false&flashvars[streamSelector.plugin]=true&flashvars[EmbedPlayer.SpinnerTarget]=videoHolder&flashvars[dualScreen.plugin]=true&flashvars[playlistAPI.playlistUrl]=https://canvasgatechtest.kaf.kaltura.com/playlist/details/{playlistAPI.kpl0Id}/categoryid/126428551', 'info_dict': { 'id': '1_jovey5nu', - 'title': '00-00 Introduction' + 'title': '00-00 Introduction', }, 'playlist': [ { @@ -145,7 +143,7 @@ class KalturaIE(InfoExtractor): 'timestamp': 1533154447, 'upload_date': '20180801', 'uploader_id': 'djoyner3', - } + }, }, { 'info_dict': { 'id': '1_jfb7mdpn', @@ -157,7 +155,7 @@ class KalturaIE(InfoExtractor): 'timestamp': 1533154489, 'upload_date': '20180801', 'uploader_id': 'djoyner3', - } + }, }, { 'info_dict': { 'id': '1_8xflxdp7', @@ -169,7 +167,7 @@ class KalturaIE(InfoExtractor): 'timestamp': 1533154512, 'upload_date': '20180801', 'uploader_id': 'djoyner3', - } + }, }, { 'info_dict': { 'id': '1_3hqew8kn', @@ -181,10 +179,10 @@ class KalturaIE(InfoExtractor): 'timestamp': 1533154536, 'upload_date': '20180801', 'uploader_id': 'djoyner3', - } - } - ] - } + }, + }, + ], + }, ] @classmethod @@ -192,14 +190,14 @@ class KalturaIE(InfoExtractor): # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site finditer = ( list(re.finditer( - r"""(?xs) + r'''(?xs) kWidget\.(?:thumb)?[Ee]mbed\( \{.*? (?P<q1>['"])wid(?P=q1)\s*:\s* (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) - """, webpage)) + ''', webpage)) or list(re.finditer( r'''(?xs) (?P<q1>["']) @@ -230,34 +228,34 @@ class KalturaIE(InfoExtractor): for k, v in embed_info.items(): if v: embed_info[k] = v.strip() - embed_url = 'kaltura:%(partner_id)s:%(id)s' % embed_info + embed_url = 'kaltura:{partner_id}:{id}'.format(**embed_info) escaped_pid = re.escape(embed_info['partner_id']) service_mobj = re.search( - r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid), + rf'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/{escaped_pid}/sp/{escaped_pid}00/embedIframeJs', webpage) if service_mobj: embed_url = smuggle_url(embed_url, {'service_url': service_mobj.group('id')}) urls.append(embed_url) return urls - def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): + def _kaltura_api_call(self, video_id, actions, service_url=None, **kwargs): params = actions[0] - params.update({i: a for i, a in enumerate(actions[1:], start=1)}) + params.update(dict(enumerate(actions[1:], start=1))) data = self._download_json( (service_url or self._SERVICE_URL) + self._SERVICE_BASE, - video_id, data=json.dumps(params).encode('utf-8'), + video_id, data=json.dumps(params).encode(), headers={ 'Content-Type': 'application/json', 'Accept-Encoding': 'gzip, deflate, br', - }, *args, **kwargs) + }, **kwargs) for idx, status in enumerate(data): if not isinstance(status, dict): continue if status.get('objectType') == 'KalturaAPIException': raise ExtractorError( - '%s said: %s (%d)' % (self.IE_NAME, status['message'], idx)) + '{} said: {} ({})'.format(self.IE_NAME, status['message'], idx)) data[1] = traverse_obj(data, (1, 'objects', 0)) @@ -342,7 +340,7 @@ class KalturaIE(InfoExtractor): 'apiVersion': '3.1', 'clientTag': 'kwidget:v2.89', 'ignoreNull': 1, - 'ks': '{1:result:ks}' + 'ks': '{1:result:ks}', }, # info { @@ -397,10 +395,10 @@ class KalturaIE(InfoExtractor): raise ExtractorError('Invalid URL', expected=True) params = {} if query: - params = compat_parse_qs(query) + params = urllib.parse.parse_qs(query) if path: splitted_path = path.split('/') - params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]])))) + params.update(dict(zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))) if 'wid' in params: partner_id = remove_start(params['wid'][0], '_') elif 'p' in params: @@ -423,14 +421,11 @@ class KalturaIE(InfoExtractor): # Unfortunately, data returned in kalturaIframePackageData lacks # captions so we will try requesting the complete data using # regular approach since we now know the entry_id - try: + # Even if this fails we already have everything extracted + # apart from captions and can process at least with this + with contextlib.suppress(ExtractorError): _, info, flavor_assets, captions = self._get_video_info( entry_id, partner_id, player_type=player_type) - except ExtractorError: - # Regular scenario failed but we already have everything - # extracted apart from captions and can process at least - # with this - pass elif 'uiconf_id' in params and 'flashvars[playlistAPI.kpl0Id]' in params: playlist_id = params['flashvars[playlistAPI.kpl0Id]'][0] webpage = self._download_webpage(url, playlist_id) @@ -451,16 +446,16 @@ class KalturaIE(InfoExtractor): source_url = smuggled_data.get('source_url') if source_url: referrer = base64.b64encode( - '://'.join(compat_urlparse.urlparse(source_url)[:2]) - .encode('utf-8')).decode('utf-8') + '://'.join(urllib.parse.urlparse(source_url)[:2]) + .encode()).decode('utf-8') else: referrer = None def sign_url(unsigned_url): if ks: - unsigned_url += '/ks/%s' % ks + unsigned_url += f'/ks/{ks}' if referrer: - unsigned_url += '?referrer=%s' % referrer + unsigned_url += f'?referrer={referrer}' return unsigned_url data_url = info['dataUrl'] @@ -487,8 +482,8 @@ class KalturaIE(InfoExtractor): else: f['fileExt'] = 'mp4' video_url = sign_url( - '%s/flavorId/%s' % (data_url, f['id'])) - format_id = '%(fileExt)s-%(bitrate)s' % f + '{}/flavorId/{}'.format(data_url, f['id'])) + format_id = '{fileExt}-{bitrate}'.format(**f) # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o) if f.get('isOriginal') is True and not self._is_valid_url( video_url, entry_id, format_id): @@ -527,7 +522,7 @@ class KalturaIE(InfoExtractor): continue caption_format = int_or_none(caption.get('format')) subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({ - 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']), + 'url': '{}/api_v3/service/caption_captionasset/action/serve/captionAssetId/{}'.format(self._SERVICE_URL, caption['id']), 'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml', }) diff --git a/yt_dlp/extractor/kankanews.py b/yt_dlp/extractor/kankanews.py index 3d74c74..a39ff78 100644 --- a/yt_dlp/extractor/kankanews.py +++ b/yt_dlp/extractor/kankanews.py @@ -19,7 +19,7 @@ class KankaNewsIE(InfoExtractor): 'ext': 'mp4', 'title': '视频|第23个中国记者节,我们在进博切蛋糕', 'thumbnail': r're:^https?://.*\.jpg*', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/karaoketv.py b/yt_dlp/extractor/karaoketv.py index 381dc00..8168b1a 100644 --- a/yt_dlp/extractor/karaoketv.py +++ b/yt_dlp/extractor/karaoketv.py @@ -13,7 +13,7 @@ class KaraoketvIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, } def _real_extract(self, url): @@ -45,7 +45,7 @@ class KaraoketvIE(InfoExtractor): servers = ('wowzail.video-cdn.com:80/vodcdn', ) formats = [{ - 'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server, + 'url': f'rtmp://{server}' if not server.startswith('rtmp') else server, 'play_path': play_path, 'app': 'vodcdn', 'page_url': video_cdn_url, diff --git a/yt_dlp/extractor/kelbyone.py b/yt_dlp/extractor/kelbyone.py index bba527e..0ac0c5e 100644 --- a/yt_dlp/extractor/kelbyone.py +++ b/yt_dlp/extractor/kelbyone.py @@ -24,7 +24,7 @@ class KelbyOneIE(InfoExtractor): 'duration': 90, 'upload_date': '20201001', }, - }] + }], }] def _entries(self, playlist): diff --git a/yt_dlp/extractor/khanacademy.py b/yt_dlp/extractor/khanacademy.py index 5333036..3f03f9e 100644 --- a/yt_dlp/extractor/khanacademy.py +++ b/yt_dlp/extractor/khanacademy.py @@ -3,43 +3,52 @@ import json from .common import InfoExtractor from ..utils import ( int_or_none, + make_archive_id, parse_iso8601, - try_get, + str_or_none, + traverse_obj, + url_or_none, + urljoin, ) class KhanAcademyBaseIE(InfoExtractor): _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)' + _PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70' + def _parse_video(self, video): return { '_type': 'url_transparent', 'url': video['youtubeId'], - 'id': video.get('slug'), - 'title': video.get('title'), - 'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'), - 'duration': int_or_none(video.get('duration')), - 'description': video.get('description'), + 'id': video['youtubeId'], 'ie_key': 'Youtube', + **traverse_obj(video, { + 'display_id': ('id', {str_or_none}), + 'title': ('translatedTitle', {str}), + 'thumbnail': ('thumbnailUrls', ..., 'url', {url_or_none}), + 'duration': ('duration', {int_or_none}), + 'description': ('description', {str}), + }, get_all=False), } def _real_extract(self, url): display_id = self._match_id(url) content = self._download_json( - 'https://www.khanacademy.org/api/internal/graphql/FetchContentData', - display_id, query={ + 'https://www.khanacademy.org/api/internal/graphql/ContentForPath', display_id, + query={ 'fastly_cacheable': 'persist_until_publish', - 'hash': '4134764944', - 'lang': 'en', + 'pcv': self._PUBLISHED_CONTENT_VERSION, + 'hash': '1242644265', 'variables': json.dumps({ 'path': display_id, - 'queryParams': 'lang=en', - 'isModal': False, - 'followRedirects': True, 'countryCode': 'US', + 'kaLocale': 'en', + 'clientPublishedContentVersion': self._PUBLISHED_CONTENT_VERSION, }), - })['data']['contentJson'] - return self._parse_component_props(self._parse_json(content, display_id)['componentProps']) + 'lang': 'en', + })['data']['contentRoute']['listedPathData'] + return self._parse_component_props(content, display_id) class KhanAcademyIE(KhanAcademyBaseIE): @@ -47,64 +56,98 @@ class KhanAcademyIE(KhanAcademyBaseIE): _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/') _TEST = { 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad', - 'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0', + 'md5': '1d5c2e70fa6aa29c38eca419f12515ce', 'info_dict': { 'id': 'FlIG3TvQCBQ', 'ext': 'mp4', 'title': 'The one-time pad', 'description': 'The perfect cipher', + 'display_id': '716378217', 'duration': 176, - 'uploader': 'Brit Cruise', - 'uploader_id': 'khanacademy', + 'uploader': 'Khan Academy', + 'uploader_id': '@khanacademy', + 'uploader_url': 'https://www.youtube.com/@khanacademy', 'upload_date': '20120411', 'timestamp': 1334170113, 'license': 'cc-by-nc-sa', + 'live_status': 'not_live', + 'channel': 'Khan Academy', + 'channel_id': 'UC4a-Gbdw7vOaccHmFo40b9g', + 'channel_url': 'https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g', + 'channel_is_verified': True, + 'playable_in_embed': True, + 'categories': ['Education'], + 'creators': ['Brit Cruise'], + 'tags': [], + 'age_limit': 0, + 'availability': 'public', + 'comment_count': int, + 'channel_follower_count': int, + 'thumbnail': str, + 'view_count': int, + 'like_count': int, + 'heatmap': list, }, 'add_ie': ['Youtube'], } - def _parse_component_props(self, component_props): - video = component_props['tutorialPageData']['contentModel'] - info = self._parse_video(video) - author_names = video.get('authorNames') - info.update({ - 'uploader': ', '.join(author_names) if author_names else None, - 'timestamp': parse_iso8601(video.get('dateAdded')), - 'license': video.get('kaUserLicense'), - }) - return info + def _parse_component_props(self, component_props, display_id): + video = component_props['content'] + return { + **self._parse_video(video), + **traverse_obj(video, { + 'creators': ('authorNames', ..., {str}), + 'timestamp': ('dateAdded', {parse_iso8601}), + 'license': ('kaUserLicense', {str}), + }), + } class KhanAcademyUnitIE(KhanAcademyBaseIE): IE_NAME = 'khanacademy:unit' - _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)' - _TEST = { + _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('1,2', '')) + '/?(?:[?#&]|$)' + _TESTS = [{ 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography', 'info_dict': { - 'id': 'cryptography', + 'id': 'x48c910b6', 'title': 'Cryptography', 'description': 'How have humans protected their secret messages through history? What has changed today?', + 'display_id': 'computing/computer-science/cryptography', + '_old_archive_ids': ['khanacademyunit cryptography'], }, 'playlist_mincount': 31, - } + }, { + 'url': 'https://www.khanacademy.org/computing/computer-science', + 'info_dict': { + 'id': 'x301707a0', + 'title': 'Computer science theory', + 'description': 'md5:4b472a4646e6cf6ec4ccb52c4062f8ba', + 'display_id': 'computing/computer-science', + '_old_archive_ids': ['khanacademyunit computer-science'], + }, + 'playlist_mincount': 50, + }] + + def _parse_component_props(self, component_props, display_id): + course = component_props['course'] + selected_unit = traverse_obj(course, ( + 'unitChildren', lambda _, v: v['relativeUrl'] == f'/{display_id}', any)) or course - def _parse_component_props(self, component_props): - curation = component_props['curation'] + def build_entry(entry): + return self.url_result(urljoin( + 'https://www.khanacademy.org', entry['canonicalUrl']), + KhanAcademyIE, title=entry.get('translatedTitle')) - entries = [] - tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or [] - for tutorial_number, tutorial in enumerate(tutorials, 1): - chapter_info = { - 'chapter': tutorial.get('title'), - 'chapter_number': tutorial_number, - 'chapter_id': tutorial.get('id'), - } - for content_item in (tutorial.get('contentItems') or []): - if content_item.get('kind') == 'Video': - info = self._parse_video(content_item) - info.update(chapter_info) - entries.append(info) + entries = traverse_obj(selected_unit, ( + (('unitChildren', ...), None), 'allOrderedChildren', ..., 'curatedChildren', + lambda _, v: v['contentKind'] == 'Video' and v['canonicalUrl'], {build_entry})) return self.playlist_result( - entries, curation.get('unit'), curation.get('title'), - curation.get('description')) + entries, + display_id=display_id, + **traverse_obj(selected_unit, { + 'id': ('id', {str}), + 'title': ('translatedTitle', {str}), + 'description': ('translatedDescription', {str}), + '_old_archive_ids': ('slug', {str}, {lambda x: [make_archive_id(self, x)] if x else None}), + })) diff --git a/yt_dlp/extractor/kicker.py b/yt_dlp/extractor/kicker.py index a2c7dd4..4ab6751 100644 --- a/yt_dlp/extractor/kicker.py +++ b/yt_dlp/extractor/kicker.py @@ -20,8 +20,8 @@ class KickerIE(InfoExtractor): 'age_limit': 0, 'thumbnail': r're:https://s\d+\.dmcdn\.net/v/T-x741YeYAx8aSZ0Z/x1080', 'tags': ['published', 'category.InternationalSoccer'], - 'upload_date': '20220608' - } + 'upload_date': '20220608', + }, }, { 'url': 'https://www.kicker.de/ex-unioner-in-der-bezirksliga-felix-kroos-vereinschallenge-in-pankow-902825/video', 'info_dict': { @@ -39,7 +39,7 @@ class KickerIE(InfoExtractor): 'uploader': 'kicker.de', 'description': 'md5:0c2060c899a91c8bf40f578f78c5846f', 'like_count': int, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/kinja.py b/yt_dlp/extractor/kinja.py index f4e5c4c..99c8a12 100644 --- a/yt_dlp/extractor/kinja.py +++ b/yt_dlp/extractor/kinja.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) from ..utils import ( int_or_none, parse_iso8601, @@ -32,7 +30,7 @@ class KinjaEmbedIE(InfoExtractor): ajax/inset| embed/video )/iframe\?.*?\bid=''' - _VALID_URL = r'''(?x)https?://%s%s + _VALID_URL = rf'''(?x)https?://{_DOMAIN_REGEX}{_COMMON_REGEX} (?P<type> fb| imgur| @@ -49,7 +47,7 @@ class KinjaEmbedIE(InfoExtractor): vimeo| vine| youtube-(?:list|video) - )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX) + )-(?P<id>[^&]+)''' _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//{_DOMAIN_REGEX})?{_COMMON_REGEX}(?:(?!\1).)+)\1'] _TESTS = [{ 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621', @@ -116,7 +114,7 @@ class KinjaEmbedIE(InfoExtractor): provider = self._PROVIDER_MAP.get(video_type) if provider: - video_id = compat_urllib_parse_unquote(video_id) + video_id = urllib.parse.unquote(video_id) if video_type == 'tumblr-post': video_id, blog = video_id.split('-', 1) result_url = provider[0] % (blog, video_id) @@ -145,7 +143,7 @@ class KinjaEmbedIE(InfoExtractor): poster = data.get('poster') or {} poster_id = poster.get('id') if poster_id: - thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg') + thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/{}.{}'.format(poster_id, poster.get('format') or 'jpg') return { 'id': video_id, @@ -190,10 +188,10 @@ class KinjaEmbedIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str), + 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], str), 'uploader': fmg.get('network'), 'duration': int_or_none(iptc.get('fileDuration')), 'formats': formats, - 'description': try_get(iptc, lambda x: x['description']['en'], compat_str), + 'description': try_get(iptc, lambda x: x['description']['en'], str), 'timestamp': parse_iso8601(iptc.get('dateReleased')), } diff --git a/yt_dlp/extractor/kommunetv.py b/yt_dlp/extractor/kommunetv.py index 432816c..5ec33a9 100644 --- a/yt_dlp/extractor/kommunetv.py +++ b/yt_dlp/extractor/kommunetv.py @@ -10,16 +10,16 @@ class KommunetvIE(InfoExtractor): 'info_dict': { 'id': '921', 'title': 'Bystyremøte', - 'ext': 'mp4' - } + 'ext': 'mp4', + }, } def _real_extract(self, url): video_id = self._match_id(url) headers = { - 'Accept': 'application/json' + 'Accept': 'application/json', } - data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers) + data = self._download_json(f'https://oslo.kommunetv.no/api/streams?streamType=1&id={video_id}', video_id, headers=headers) title = data['stream']['title'] file = data['playlist'][0]['playlist'][0]['file'] url = update_url(file, query=None, fragment=None) @@ -27,5 +27,5 @@ class KommunetvIE(InfoExtractor): return { 'id': video_id, 'formats': formats, - 'title': title + 'title': title, } diff --git a/yt_dlp/extractor/kompas.py b/yt_dlp/extractor/kompas.py index 8bad961..2ef076c 100644 --- a/yt_dlp/extractor/kompas.py +++ b/yt_dlp/extractor/kompas.py @@ -16,7 +16,7 @@ class KompasVideoIE(JixieBaseIE): 'categories': ['news'], 'thumbnail': 'https://video.jixie.media/1001/164474/164474_1280x720.jpg', 'tags': 'count:9', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py index c78a7b9..6ec5b59 100644 --- a/yt_dlp/extractor/koo.py +++ b/yt_dlp/extractor/koo.py @@ -19,9 +19,9 @@ class KooIE(InfoExtractor): 'uploader_id': 'ytdlpTestAccount', 'uploader': 'yt-dlpTestAccount', 'duration': 7000, - 'upload_date': '20210921' + 'upload_date': '20210921', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # Test for koo with long title 'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361', 'info_dict': { @@ -33,9 +33,9 @@ class KooIE(InfoExtractor): 'uploader_id': 'laxman_kumarDBFEC', 'uploader': 'Laxman Kumar 🇮🇳', 'duration': 46000, - 'upload_date': '20210920' + 'upload_date': '20210920', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # Test for audio 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602', 'info_dict': { @@ -47,9 +47,9 @@ class KooIE(InfoExtractor): 'uploader_id': 'ytdlpTestAccount', 'uploader': 'yt-dlpTestAccount', 'duration': 214000, - 'upload_date': '20210921' + 'upload_date': '20210921', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # Test for video 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1', 'info_dict': { @@ -61,9 +61,9 @@ class KooIE(InfoExtractor): 'uploader_id': 'ytdlpTestAccount', 'uploader': 'yt-dlpTestAccount', 'duration': 14000, - 'upload_date': '20210921' + 'upload_date': '20210921', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # Test for link 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a', 'skip': 'No video/audio found at the provided url.', @@ -83,10 +83,11 @@ class KooIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) - data_json = self._download_json(f'https://www.kooapp.com/apiV1/ku/{id}?limit=20&offset=0&showSimilarKoos=true', id)['parentContent'] + video_id = self._match_id(url) + data_json = self._download_json( + f'https://www.kooapp.com/apiV1/ku/{video_id}?limit=20&offset=0&showSimilarKoos=true', video_id)['parentContent'] item_json = next(content['items'][0] for content in data_json - if try_get(content, lambda x: x['items'][0]['id']) == id) + if try_get(content, lambda x: x['items'][0]['id']) == video_id) media_json = item_json['mediaMap'] formats = [] @@ -98,12 +99,12 @@ class KooIE(InfoExtractor): 'ext': 'mp4', }) if video_m3u8_url: - formats.extend(self._extract_m3u8_formats(video_m3u8_url, id, fatal=False, ext='mp4')) + formats.extend(self._extract_m3u8_formats(video_m3u8_url, video_id, fatal=False, ext='mp4')) if not formats: self.raise_no_formats('No video/audio found at the provided url.', expected=True) return { - 'id': id, + 'id': video_id, 'title': clean_html(item_json.get('title')), 'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}', 'timestamp': item_json.get('createdAt'), diff --git a/yt_dlp/extractor/kth.py b/yt_dlp/extractor/kth.py index e17c6db..76899fd 100644 --- a/yt_dlp/extractor/kth.py +++ b/yt_dlp/extractor/kth.py @@ -16,13 +16,12 @@ class KTHIE(InfoExtractor): 'timestamp': 1647345358, 'upload_date': '20220315', 'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f', - } + }, } def _real_extract(self, url): video_id = self._match_id(url) - result = self.url_result( - smuggle_url('kaltura:308:%s' % video_id, { + return self.url_result( + smuggle_url(f'kaltura:308:{video_id}', { 'service_url': 'https://api.kaltura.nordu.net'}), 'Kaltura') - return result diff --git a/yt_dlp/extractor/ku6.py b/yt_dlp/extractor/ku6.py index 31b4ea0..00e814c 100644 --- a/yt_dlp/extractor/ku6.py +++ b/yt_dlp/extractor/ku6.py @@ -10,7 +10,7 @@ class Ku6IE(InfoExtractor): 'id': 'JG-8yS14xzBr4bCn1pu0xw', 'ext': 'f4v', 'title': 'techniques test', - } + }, } def _real_extract(self, url): @@ -19,12 +19,12 @@ class Ku6IE(InfoExtractor): title = self._html_search_regex( r'<h1 title=.*>(.*?)</h1>', webpage, 'title') - dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id - jsonData = self._download_json(dataUrl, video_id) - downloadUrl = jsonData['data']['f'] + data_url = f'http://v.ku6.com/fetchVideo4Player/{video_id}.html' + json_data = self._download_json(data_url, video_id) + download_url = json_data['data']['f'] return { 'id': video_id, 'title': title, - 'url': downloadUrl + 'url': download_url, } diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py index b776671..80b6b55 100644 --- a/yt_dlp/extractor/kuwo.py +++ b/yt_dlp/extractor/kuwo.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, InAdvancePagedList, @@ -18,7 +18,7 @@ class KuwoBaseIE(InfoExtractor): {'format': 'mp3-192', 'ext': 'mp3', 'br': '192kmp3', 'abr': 192, 'preference': 70}, {'format': 'mp3-128', 'ext': 'mp3', 'br': '128kmp3', 'abr': 128, 'preference': 60}, {'format': 'wma', 'ext': 'wma', 'preference': 20}, - {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10} + {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}, ] def _get_formats(self, song_id, tolerate_ip_deny=False): @@ -27,21 +27,21 @@ class KuwoBaseIE(InfoExtractor): query = { 'format': file_format['ext'], 'br': file_format.get('br', ''), - 'rid': 'MUSIC_%s' % song_id, + 'rid': f'MUSIC_{song_id}', 'type': 'convert_url', - 'response': 'url' + 'response': 'url', } song_url = self._download_webpage( 'http://antiserver.kuwo.cn/anti.s', - song_id, note='Download %s url info' % file_format['format'], + song_id, note='Download {} url info'.format(file_format['format']), query=query, headers=self.geo_verification_headers(), ) if song_url == 'IPDeny' and not tolerate_ip_deny: raise ExtractorError('This song is blocked in this region', expected=True) - if song_url.startswith('http://') or song_url.startswith('https://'): + if song_url.startswith(('http://', 'https://')): formats.append({ 'url': song_url, 'format_id': file_format['format'], @@ -66,7 +66,7 @@ class KuwoIE(KuwoBaseIE): 'title': '爱我别走', 'creator': '张震岳', 'upload_date': '20080122', - 'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c' + 'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c', }, 'skip': 'this song has been offline because of copyright issues', }, { @@ -113,7 +113,7 @@ class KuwoIE(KuwoBaseIE): publish_time = None if album_id is not None: album_info_page = self._download_webpage( - 'http://www.kuwo.cn/album/%s/' % album_id, song_id, + f'http://www.kuwo.cn/album/{album_id}/', song_id, note='Download album detail info', errnote='Unable to get album detail info') @@ -160,7 +160,7 @@ class KuwoAlbumIE(InfoExtractor): 'album name') album_intro = remove_start( clean_html(get_element_by_id('intro', webpage)), - '%s简介:' % album_name) + f'{album_name}简介:') entries = [ self.url_result(song_url, 'Kuwo') for song_url in re.findall( @@ -238,12 +238,12 @@ class KuwoSingerIE(InfoExtractor): def page_func(page_num): webpage = self._download_webpage( 'http://www.kuwo.cn/artist/contentMusicsAjax', - singer_id, note='Download song list page #%d' % (page_num + 1), - errnote='Unable to get song list page #%d' % (page_num + 1), + singer_id, note=f'Download song list page #{page_num + 1}', + errnote=f'Unable to get song list page #{page_num + 1}', query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE}) return [ - self.url_result(compat_urlparse.urljoin(url, song_url), 'Kuwo') + self.url_result(urllib.parse.urljoin(url, song_url), 'Kuwo') for song_url in re.findall( r'<div[^>]+class="name"><a[^>]+href="(/yinyue/\d+)', webpage) @@ -280,7 +280,7 @@ class KuwoCategoryIE(InfoExtractor): category_desc = remove_start( get_element_by_id('intro', webpage).strip(), - '%s简介:' % category_name) + f'{category_name}简介:') if category_desc == '暂无': category_desc = None @@ -288,7 +288,7 @@ class KuwoCategoryIE(InfoExtractor): r'var\s+jsonm\s*=\s*([^;]+);', webpage, 'category songs'), category_id) entries = [ - self.url_result('http://www.kuwo.cn/yinyue/%s/' % song['musicrid'], 'Kuwo') + self.url_result('http://www.kuwo.cn/yinyue/{}/'.format(song['musicrid']), 'Kuwo') for song in jsonm['musiclist'] ] return self.playlist_result(entries, category_id, category_name, category_desc) @@ -314,16 +314,16 @@ class KuwoMvIE(KuwoBaseIE): 'format': 'mv', }, } - _FORMATS = KuwoBaseIE._FORMATS + [ + _FORMATS = [ + *KuwoBaseIE._FORMATS, {'format': 'mkv', 'ext': 'mkv', 'preference': 250}, - {'format': 'mp4', 'ext': 'mp4', 'preference': 200}, - ] + {'format': 'mp4', 'ext': 'mp4', 'preference': 200}] def _real_extract(self, url): song_id = self._match_id(url) webpage = self._download_webpage( - url, song_id, note='Download mv detail info: %s' % song_id, - errnote='Unable to get mv detail info: %s' % song_id) + url, song_id, note=f'Download mv detail info: {song_id}', + errnote=f'Unable to get mv detail info: {song_id}') mobj = re.search( r'<h1[^>]+title="(?P<song>[^"]+)">[^<]+<span[^>]+title="(?P<singer>[^"]+)"', @@ -337,8 +337,8 @@ class KuwoMvIE(KuwoBaseIE): formats = self._get_formats(song_id, tolerate_ip_deny=True) mv_url = self._download_webpage( - 'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_%s' % song_id, - song_id, note='Download %s MV URL' % song_id) + f'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_{song_id}', + song_id, note=f'Download {song_id} MV URL') formats.append({ 'url': mv_url, 'format_id': 'mv', diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py index f5fd241..20a5235 100644 --- a/yt_dlp/extractor/la7.py +++ b/yt_dlp/extractor/la7.py @@ -93,7 +93,7 @@ class LA7IE(InfoExtractor): 'description': self._og_search_description(webpage, default=None), 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'formats': formats, - 'upload_date': unified_strdate(self._search_regex(r'datetime="(.+?)"', webpage, 'upload_date', fatal=False)) + 'upload_date': unified_strdate(self._search_regex(r'datetime="(.+?)"', webpage, 'upload_date', fatal=False)), } diff --git a/yt_dlp/extractor/laracasts.py b/yt_dlp/extractor/laracasts.py new file mode 100644 index 0000000..4494c4b --- /dev/null +++ b/yt_dlp/extractor/laracasts.py @@ -0,0 +1,114 @@ +import json + +from .common import InfoExtractor +from .vimeo import VimeoIE +from ..utils import ( + clean_html, + extract_attributes, + get_element_html_by_id, + int_or_none, + parse_duration, + str_or_none, + unified_strdate, + url_or_none, + urljoin, +) +from ..utils.traversal import traverse_obj + + +class LaracastsBaseIE(InfoExtractor): + def _get_prop_data(self, url, display_id): + webpage = self._download_webpage(url, display_id) + return traverse_obj( + get_element_html_by_id('app', webpage), + ({extract_attributes}, 'data-page', {json.loads}, 'props')) + + def _parse_episode(self, episode): + if not traverse_obj(episode, 'vimeoId'): + self.raise_login_required('This video is only available for subscribers.') + return self.url_result( + VimeoIE._smuggle_referrer( + f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'), + VimeoIE, url_transparent=True, + **traverse_obj(episode, { + 'id': ('id', {int}, {str_or_none}), + 'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}), + 'title': ('title', {clean_html}), + 'season_number': ('chapter', {int_or_none}), + 'episode_number': ('position', {int_or_none}), + 'description': ('body', {clean_html}), + 'thumbnail': ('largeThumbnail', {url_or_none}), + 'duration': ('length', {int_or_none}), + 'date': ('dateSegments', 'published', {unified_strdate}), + })) + + +class LaracastsIE(LaracastsBaseIE): + IE_NAME = 'laracasts' + _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1', + 'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc', + 'info_dict': { + 'id': '922040563', + 'title': 'Hello, Laravel', + 'ext': 'mp4', + 'duration': 519, + 'date': '20240312', + 'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png', + 'description': 'md5:ddd658bb241975871d236555657e1dd1', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Episode 1', + 'uploader': 'Laracasts', + 'uploader_id': 'user20182673', + 'uploader_url': 'https://vimeo.com/user20182673', + }, + 'expected_warnings': ['Failed to parse XML'], # TODO: Remove when vimeo extractor is fixed + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + return self._parse_episode(self._get_prop_data(url, display_id)['lesson']) + + +class LaracastsPlaylistIE(LaracastsBaseIE): + IE_NAME = 'laracasts:series' + _VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11', + 'info_dict': { + 'title': '30 Days to Learn Laravel', + 'id': '210', + 'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2', + 'duration': 30600.0, + 'modified_date': '20240511', + 'description': 'md5:27c260a1668a450984e8f901579912dd', + 'categories': ['Frameworks'], + 'tags': ['Laravel'], + 'display_id': '30-days-to-learn-laravel-11', + }, + 'playlist_count': 30, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + series = self._get_prop_data(url, display_id)['series'] + + metadata = { + 'display_id': display_id, + **traverse_obj(series, { + 'title': ('title', {str}), + 'id': ('id', {int}, {str_or_none}), + 'description': ('body', {clean_html}), + 'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any), + 'duration': ('runTime', {parse_duration}), + 'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}), + 'tags': ('topics', ..., 'name', {str}), + 'modified_date': ('lastUpdated', {unified_strdate}), + }), + } + + return self.playlist_result(traverse_obj( + series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata) diff --git a/yt_dlp/extractor/laxarxames.py b/yt_dlp/extractor/laxarxames.py index e157f7c..f6d515b 100644 --- a/yt_dlp/extractor/laxarxames.py +++ b/yt_dlp/extractor/laxarxames.py @@ -58,7 +58,7 @@ class LaXarxaMesIE(InfoExtractor): 'https://api.laxarxames.cat/Media/GetMediaPlayInfo', video_id, data=json.dumps({ 'MediaId': int(video_id), - 'StreamType': 'MAIN' + 'StreamType': 'MAIN', }).encode(), headers={ 'Authorization': f'Bearer {self._TOKEN}', 'X-Tenantorigin': 'https://laxarxames.cat', diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index dcb44d0..c764d49 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -24,7 +24,7 @@ from ..utils import ( class LBRYBaseIE(InfoExtractor): _BASE_URL_REGEX = r'(?x)(?:https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/|lbry://)' _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}' - _OPT_CLAIM_ID = '[^$@:/?#&]+(?:[:#]%s)?' % _CLAIM_ID_REGEX + _OPT_CLAIM_ID = f'[^$@:/?#&]+(?:[:#]{_CLAIM_ID_REGEX})?' _SUPPORTED_STREAM_TYPES = ['video', 'audio'] _PAGE_SIZE = 50 @@ -35,7 +35,7 @@ class LBRYBaseIE(InfoExtractor): headers['x-lbry-auth-token'] = token response = self._download_json( 'https://api.lbry.tv/api/v1/proxy', - display_id, 'Downloading %s JSON metadata' % resource, + display_id, f'Downloading {resource} JSON metadata', headers=headers, data=json.dumps({ 'method': method, @@ -54,7 +54,7 @@ class LBRYBaseIE(InfoExtractor): def _permanent_url(self, url, claim_name, claim_id): return urljoin( url.replace('lbry://', 'https://lbry.tv/'), - '/%s:%s' % (claim_name, claim_id)) + f'/{claim_name}:{claim_id}') def _parse_stream(self, stream, url): stream_type = traverse_obj(stream, ('value', 'stream_type', {str})) @@ -169,9 +169,9 @@ class LBRYIE(LBRYBaseIE): 'lbc', 'lbry', 'start', - 'tutorial' + 'tutorial', ], - } + }, }, { # Audio 'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e', @@ -194,7 +194,7 @@ class LBRYIE(LBRYBaseIE): 'thumbnail': 'https://spee.ch/d/0bc63b0e6bf1492d.png', 'license': 'None', 'uploader_id': '@LBRYFoundation', - } + }, }, { 'url': 'https://odysee.com/@gardeningincanada:b/plants-i-will-never-grow-again.-the:e', 'md5': 'c35fac796f62a14274b4dc2addb5d0ba', @@ -216,7 +216,7 @@ class LBRYIE(LBRYBaseIE): 'formats': 'mincount:3', 'thumbnail': 'https://thumbnails.lbry.com/AgHSc_HzrrE', 'license': 'Copyrighted (contact publisher)', - } + }, }, { # HLS live stream (might expire) 'url': 'https://odysee.com/@RT:fd/livestream_RT:d', @@ -239,7 +239,7 @@ class LBRYIE(LBRYBaseIE): 'license': 'None', 'uploader_id': '@RT', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # original quality format w/higher resolution than HLS formats 'url': 'https://odysee.com/@wickedtruths:2/Biotechnological-Invasion-of-Skin-(April-2023):4', diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py index 6287419..69148be 100644 --- a/yt_dlp/extractor/lcp.py +++ b/yt_dlp/extractor/lcp.py @@ -66,7 +66,7 @@ class LcpIE(InfoExtractor): webpage = self._download_webpage(url, display_id) play_url = self._search_regex( - r'<iframe[^>]+src=(["\'])(?P<url>%s?(?:(?!\1).)*)\1' % LcpPlayIE._VALID_URL, + rf'<iframe[^>]+src=(["\'])(?P<url>{LcpPlayIE._VALID_URL}?(?:(?!\1).)*)\1', webpage, 'play iframe', default=None, group='url') if not play_url: diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py index 1a3ada1..6157f3d 100644 --- a/yt_dlp/extractor/lecture2go.py +++ b/yt_dlp/extractor/lecture2go.py @@ -25,7 +25,7 @@ class Lecture2GoIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 90f0268..4cfb872 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -49,7 +49,7 @@ class LecturioBaseIE(InfoExtractor): r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response, 'errors', default=None) if errors: - raise ExtractorError('Unable to login: %s' % errors, expected=True) + raise ExtractorError(f'Unable to login: {errors}', expected=True) raise ExtractorError('Unable to log in') @@ -130,7 +130,7 @@ class LecturioIE(LecturioBaseIE): f = { 'url': file_url, 'format_id': label, - 'filesize': float_or_none(filesize, invscale=1000) + 'filesize': float_or_none(filesize, invscale=1000), } if label: mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label) @@ -200,7 +200,7 @@ class LecturioCourseIE(LecturioBaseIE): if lecture_url: lecture_url = urljoin(url, lecture_url) else: - lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id) + lecture_url = f'https://app.lecturio.com/#/lecture/c/{course_id}/{lecture_id}' entries.append(self.url_result( lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) return self.playlist_result( diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py index a113b3d..58baa3f 100644 --- a/yt_dlp/extractor/leeco.py +++ b/yt_dlp/extractor/leeco.py @@ -1,15 +1,12 @@ +import base64 import datetime as dt import hashlib import re import time +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_ord, - compat_str, - compat_urllib_parse_urlencode, -) +from ..compat import compat_ord from ..utils import ( ExtractorError, determine_ext, @@ -140,7 +137,7 @@ class LeIE(InfoExtractor): def get_flash_urls(media_url, format_id): nodes_data = self._download_json( media_url, media_id, - 'Download JSON metadata for format %s' % format_id, + f'Download JSON metadata for format {format_id}', query={ 'm3v': 1, 'format': 1, @@ -150,7 +147,7 @@ class LeIE(InfoExtractor): req = self._request_webpage( nodes_data['nodelist'][0]['location'], media_id, - note='Downloading m3u8 information for format %s' % format_id) + note=f'Downloading m3u8 information for format {format_id}') m3u8_data = self.decrypt_m3u8(req.read()) @@ -173,7 +170,7 @@ class LeIE(InfoExtractor): f = { 'url': format_url, 'ext': determine_ext(format_data[1]), - 'format_id': '%s-%s' % (protocol, format_id), + 'format_id': f'{protocol}-{format_id}', 'protocol': 'm3u8_native' if protocol == 'hls' else 'http', 'quality': int_or_none(format_id), } @@ -207,18 +204,18 @@ class LePlaylistIE(InfoExtractor): 'info_dict': { 'id': '46177', 'title': '美人天下', - 'description': 'md5:395666ff41b44080396e59570dbac01c' + 'description': 'md5:395666ff41b44080396e59570dbac01c', }, - 'playlist_count': 35 + 'playlist_count': 35, }, { 'url': 'http://tv.le.com/izt/wuzetian/index.html', 'info_dict': { 'id': 'wuzetian', 'title': '武媚娘传奇', - 'description': 'md5:e12499475ab3d50219e5bba00b3cb248' + 'description': 'md5:e12499475ab3d50219e5bba00b3cb248', }, # This playlist contains some extra videos other than the drama itself - 'playlist_mincount': 96 + 'playlist_mincount': 96, }, { 'url': 'http://tv.le.com/pzt/lswjzzjc/index.shtml', # This series is moved to http://www.le.com/tv/10005297.html @@ -233,7 +230,7 @@ class LePlaylistIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if LeIE.suitable(url) else super(LePlaylistIE, cls).suitable(url) + return False if LeIE.suitable(url) else super().suitable(url) def _real_extract(self, url): playlist_id = self._match_id(url) @@ -294,7 +291,7 @@ class LetvCloudIE(InfoExtractor): salt = 'fbeh5player12c43eccf2bec3300344' items = ['cf', 'ran', 'uu', 'bver', 'vu'] input_data = ''.join([item + obj[item] for item in items]) + salt - obj['sign'] = hashlib.md5(input_data.encode('utf-8')).hexdigest() + obj['sign'] = hashlib.md5(input_data.encode()).hexdigest() def _get_formats(self, cf, uu, vu, media_id): def get_play_json(cf, timestamp): @@ -305,12 +302,12 @@ class LetvCloudIE(InfoExtractor): 'format': 'json', 'uu': uu, 'vu': vu, - 'ran': compat_str(timestamp), + 'ran': str(timestamp), } self.sign_data(data) return self._download_json( - 'http://api.letvcloud.com/gpc.php?' + compat_urllib_parse_urlencode(data), - media_id, 'Downloading playJson data for type %s' % cf) + 'http://api.letvcloud.com/gpc.php?' + urllib.parse.urlencode(data), + media_id, f'Downloading playJson data for type {cf}') play_json = get_play_json(cf, time.time()) # The server time may be different from local time @@ -319,14 +316,14 @@ class LetvCloudIE(InfoExtractor): if not play_json.get('data'): if play_json.get('message'): - raise ExtractorError('Letv cloud said: %s' % play_json['message'], expected=True) + raise ExtractorError('Letv cloud said: {}'.format(play_json['message']), expected=True) elif play_json.get('code'): raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True) else: raise ExtractorError('Letv cloud returned an unknown error') def b64decode(s): - return compat_b64decode(s).decode('utf-8') + return base64.b64decode(s).decode('utf-8') formats = [] for media in play_json['data']['video_info']['media'].values(): @@ -349,7 +346,7 @@ class LetvCloudIE(InfoExtractor): vu_mobj = re.search(r'vu=([\w]+)', url) if not uu_mobj or not vu_mobj: - raise ExtractorError('Invalid URL: %s' % url, expected=True) + raise ExtractorError(f'Invalid URL: {url}', expected=True) uu = uu_mobj.group(1) vu = vu_mobj.group(1) @@ -359,6 +356,6 @@ class LetvCloudIE(InfoExtractor): return { 'id': media_id, - 'title': 'Video %s' % media_id, + 'title': f'Video {media_id}', 'formats': formats, } diff --git a/yt_dlp/extractor/lego.py b/yt_dlp/extractor/lego.py index 46fc7a9..5a98cc7 100644 --- a/yt_dlp/extractor/lego.py +++ b/yt_dlp/extractor/lego.py @@ -72,7 +72,7 @@ class LEGOIE(InfoExtractor): # https://contentfeed.services.lego.com/api/v2/item/[VIDEO_ID]?culture=[LOCALE]&contentType=Video 'https://services.slingshot.lego.com/mediaplayer/v2', video_id, query={ - 'videoId': '%s_%s' % (uuid.UUID(video_id), locale), + 'videoId': f'{uuid.UUID(video_id)}_{locale}', }, headers=self.geo_verification_headers()) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 451: @@ -111,7 +111,7 @@ class LEGOIE(InfoExtractor): 'abr': quality[0], 'height': quality[1], 'width': quality[2], - }), + }) formats.append(f) subtitles = {} @@ -123,7 +123,7 @@ class LEGOIE(InfoExtractor): video_version = video.get('VideoVersion') if net_storage_path and invariant_id and video_file_id and video_version: subtitles.setdefault(locale[:2], []).append({ - 'url': 'https://lc-mediaplayerns-live-s.legocdn.com/public/%s/%s_%s_%s_%s_sub.srt' % (net_storage_path, invariant_id, video_file_id, locale, video_version), + 'url': f'https://lc-mediaplayerns-live-s.legocdn.com/public/{net_storage_path}/{invariant_id}_{video_file_id}_{locale}_{video_version}_sub.srt', }) return { diff --git a/yt_dlp/extractor/lenta.py b/yt_dlp/extractor/lenta.py index fe01bda..105ec37 100644 --- a/yt_dlp/extractor/lenta.py +++ b/yt_dlp/extractor/lenta.py @@ -45,7 +45,7 @@ class LentaIE(InfoExtractor): default=None) if video_id: return self.url_result( - 'eagleplatform:lentaru.media.eagleplatform.com:%s' % video_id, + f'eagleplatform:lentaru.media.eagleplatform.com:{video_id}', ie='EaglePlatform', video_id=video_id) return self.url_result(url, ie='Generic') diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py index 2979939..6185605 100644 --- a/yt_dlp/extractor/libraryofcongress.py +++ b/yt_dlp/extractor/libraryofcongress.py @@ -73,7 +73,7 @@ class LibraryOfCongressIE(InfoExtractor): webpage, 'media id', group='id') data = self._download_json( - 'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id, + f'https://media.loc.gov/services/v1/media?id={media_id}&context=json', media_id)['mediaObject'] derivative = data['derivatives'][0] diff --git a/yt_dlp/extractor/libsyn.py b/yt_dlp/extractor/libsyn.py index 29bbb03..4ca521a 100644 --- a/yt_dlp/extractor/libsyn.py +++ b/yt_dlp/extractor/libsyn.py @@ -18,7 +18,7 @@ class LibsynIE(InfoExtractor): 'info_dict': { 'id': '6385796', 'ext': 'mp3', - 'title': "Champion Minded - Developing a Growth Mindset", + 'title': 'Champion Minded - Developing a Growth Mindset', # description fetched using another request: # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796 # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', @@ -34,7 +34,7 @@ class LibsynIE(InfoExtractor): 'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career', 'upload_date': '20150818', 'thumbnail': 're:^https?://.*', - } + }, }] def _real_extract(self, url): @@ -56,7 +56,7 @@ class LibsynIE(InfoExtractor): r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None) or get_element_by_class('podcast-title', webpage))) - title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title + title = f'{podcast_title} - {episode_title}' if podcast_title else episode_title formats = [] for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')): diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py index ea150a5..60d50b1 100644 --- a/yt_dlp/extractor/lifenews.py +++ b/yt_dlp/extractor/lifenews.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( ExtractorError, determine_ext, @@ -31,7 +28,7 @@ class LifeNewsIE(InfoExtractor): 'timestamp': 1344154740, 'upload_date': '20120805', 'view_count': int, - } + }, }, { # single video embedded via iframe 'url': 'https://life.ru/t/новости/152125', @@ -44,7 +41,7 @@ class LifeNewsIE(InfoExtractor): 'timestamp': 1427961840, 'upload_date': '20150402', 'view_count': int, - } + }, }, { # two videos embedded via iframe 'url': 'https://life.ru/t/новости/153461', @@ -100,7 +97,7 @@ class LifeNewsIE(InfoExtractor): webpage) if not video_urls and not iframe_links: - raise ExtractorError('No media links available for %s' % video_id) + raise ExtractorError(f'No media links available for {video_id}') title = remove_end( self._og_search_title(webpage), @@ -125,14 +122,14 @@ class LifeNewsIE(InfoExtractor): def make_entry(video_id, video_url, index=None): cur_info = dict(common_info) cur_info.update({ - 'id': video_id if not index else '%s-video%s' % (video_id, index), + 'id': video_id if not index else f'{video_id}-video{index}', 'url': video_url, - 'title': title if not index else '%s (Видео %s)' % (title, index), + 'title': title if not index else f'{title} (Видео {index})', }) return cur_info def make_video_entry(video_id, video_url, index=None): - video_url = compat_urlparse.urljoin(url, video_url) + video_url = urllib.parse.urljoin(url, video_url) return make_entry(video_id, video_url, index) def make_iframe_entry(video_id, video_url, index=None): @@ -174,7 +171,7 @@ class LifeEmbedIE(InfoExtractor): 'ext': 'mp4', 'title': 'e50c2dec2867350528e2574c899b8291', 'thumbnail': r're:http://.*\.jpg', - } + }, }, { # with 1080p 'url': 'https://embed.life.ru/video/e50c2dec2867350528e2574c899b8291', @@ -207,17 +204,17 @@ class LifeEmbedIE(InfoExtractor): video_id).get('playlist', {}) if playlist: master = playlist.get('master') - if isinstance(master, compat_str) and determine_ext(master) == 'm3u8': - extract_m3u8(compat_urlparse.urljoin(url, master)) + if isinstance(master, str) and determine_ext(master) == 'm3u8': + extract_m3u8(urllib.parse.urljoin(url, master)) original = playlist.get('original') - if isinstance(original, compat_str): + if isinstance(original, str): extract_original(original) thumbnail = playlist.get('image') # Old rendition fallback if not formats: for video_url in re.findall(r'"file"\s*:\s*"([^"]+)', webpage): - video_url = compat_urlparse.urljoin(url, video_url) + video_url = urllib.parse.urljoin(url, video_url) if determine_ext(video_url) == 'm3u8': extract_m3u8(video_url) else: diff --git a/yt_dlp/extractor/likee.py b/yt_dlp/extractor/likee.py index 3244631..f6a51c8 100644 --- a/yt_dlp/extractor/likee.py +++ b/yt_dlp/extractor/likee.py @@ -162,7 +162,7 @@ class LikeeUserIE(InfoExtractor): 'count': self._PAGE_SIZE, 'lastPostId': last_post_id, 'tabType': 0, - }).encode('utf-8'), + }).encode(), headers={'content-type': 'application/json'}, note=f'Get user info with lastPostId #{last_post_id}') items = traverse_obj(user_videos, ('data', 'videoList')) diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index 1ff091d..763a014 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -32,8 +32,8 @@ class LimelightBaseIE(InfoExtractor): r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage): entries.append(cls.url_result( - smuggle('limelight:%s:%s' % (lm[kind], video_id)), - 'Limelight%s' % kind, video_id)) + smuggle(f'limelight:{lm[kind]}:{video_id}'), + f'Limelight{kind}', video_id)) for mobj in re.finditer( # As per [1] class attribute should be exactly equal to # LimelightEmbeddedPlayerFlash but numerous examples seen @@ -48,14 +48,14 @@ class LimelightBaseIE(InfoExtractor): ''', webpage): kind, video_id = mobj.group('kind'), mobj.group('id') entries.append(cls.url_result( - smuggle('limelight:%s:%s' % (kind, video_id)), - 'Limelight%s' % kind.capitalize(), video_id)) + smuggle(f'limelight:{kind}:{video_id}'), + f'Limelight{kind.capitalize()}', video_id)) # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page) for video_id in re.findall( r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})', webpage): entries.append(cls.url_result( - smuggle('limelight:media:%s' % video_id), + smuggle(f'limelight:media:{video_id}'), LimelightMediaIE.ie_key(), video_id)) return entries @@ -66,7 +66,7 @@ class LimelightBaseIE(InfoExtractor): try: return self._download_json( self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method), - item_id, 'Downloading PlaylistService %s JSON' % method, + item_id, f'Downloading PlaylistService {method} JSON', fatal=fatal, headers=headers) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 403: @@ -134,7 +134,7 @@ class LimelightBaseIE(InfoExtractor): for cdn_host, http_host in CDN_HOSTS: if cdn_host not in rtmp.group('host').lower(): continue - http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:]) + http_url = 'http://{}/{}'.format(http_host, rtmp.group('playpath')[4:]) urls.append(http_url) if self._is_valid_url(http_url, video_id, http_format_id): http_fmt = fmt.copy() @@ -351,7 +351,7 @@ class LimelightChannelListIE(LimelightBaseIE): channel_list_id, 'getMobileChannelListById') entries = [ - self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel') + self.url_result('limelight:channel:{}'.format(channel['id']), 'LimelightChannel') for channel in channel_list['channelList']] return self.playlist_result( diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 2a7c6f0..c8c8ae5 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -59,14 +59,14 @@ class LinkedInLearningBaseIE(LinkedInBaseIE): if video_slug: query.update({ 'videoSlug': video_slug, - 'resolution': '_%s' % resolution, + 'resolution': f'_{resolution}', }) sub = ' %dp' % resolution api_url = 'https://www.linkedin.com/learning-api/detailedCourses' if not self._get_cookies(api_url).get('JSESSIONID'): self.raise_login_required() return self._download_json( - api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={ + api_url, video_slug, f'Downloading{sub} JSON metadata', headers={ 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, }, query=query)['elements'][0] @@ -78,7 +78,7 @@ class LinkedInLearningBaseIE(LinkedInBaseIE): return mobj.group(1) def _get_video_id(self, video_data, course_slug, video_slug): - return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) + return self._get_urn_id(video_data) or f'{course_slug}/{video_slug}' class LinkedInIE(LinkedInBaseIE): @@ -92,7 +92,7 @@ class LinkedInIE(LinkedInBaseIE): 'description': 'md5:2998a31f6f479376dd62831f53a80f71', 'uploader': 'Mishal K.', 'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$', - 'like_count': int + 'like_count': int, }, }, { 'url': 'https://www.linkedin.com/posts/the-mathworks_2_what-is-mathworks-cloud-center-activity-7151241570371948544-4Gu7', @@ -104,7 +104,7 @@ class LinkedInIE(LinkedInBaseIE): 'uploader': 'MathWorks', 'thumbnail': 're:^https?://media.licdn.com/dms/image/.*$', 'like_count': int, - 'subtitles': 'mincount:1' + 'subtitles': 'mincount:1', }, }] @@ -159,9 +159,10 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): for line, (line_dict, next_dict) in enumerate(itertools.zip_longest(transcript_lines, transcript_lines[1:])): start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption'] end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1 - srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time), - srt_subtitles_timecode(end_time), - caption) + srt_data += ( + f'{line + 1}\n' + f'{srt_subtitles_timecode(start_time)} --> {srt_subtitles_timecode(end_time)}\n' + f'{caption}\n\n') return srt_data def _real_extract(self, url): @@ -176,7 +177,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): progressive_url = video_url_data.get('progressiveUrl') if progressive_url: formats.append({ - 'format_id': 'progressive-%dp' % height, + 'format_id': f'progressive-{height}p', 'url': progressive_url, 'ext': 'mp4', 'height': height, @@ -208,7 +209,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): if transcript_lines: subtitles['en'] = [{ 'ext': 'srt', - 'data': self.json2srt(transcript_lines, duration) + 'data': self.json2srt(transcript_lines, duration), }] return { @@ -222,7 +223,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): # It seems like this would be correctly handled by default # However, unless someone can confirm this, the old # behaviour is being kept as-is - '_format_sort_fields': ('res', 'source_preference') + '_format_sort_fields': ('res', 'source_preference'), } @@ -241,7 +242,7 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE): @classmethod def suitable(cls, url): - return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url) + return False if LinkedInLearningIE.suitable(url) else super().suitable(url) def _real_extract(self, url): course_slug = self._match_id(url) @@ -259,7 +260,7 @@ class LinkedInLearningCourseIE(LinkedInLearningBaseIE): '_type': 'url_transparent', 'id': self._get_video_id(video, course_slug, video_slug), 'title': video.get('title'), - 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug), + 'url': f'https://www.linkedin.com/learning/{course_slug}/{video_slug}', 'chapter': chapter_title, 'chapter_number': chapter_number, 'chapter_id': chapter_id, diff --git a/yt_dlp/extractor/liputan6.py b/yt_dlp/extractor/liputan6.py index c4477b9..a29234a 100644 --- a/yt_dlp/extractor/liputan6.py +++ b/yt_dlp/extractor/liputan6.py @@ -25,8 +25,8 @@ class Liputan6IE(InfoExtractor): 'tags': ['perawat indonesia', 'rumah sakit', 'Medan', 'viral hari ini', 'viral', 'enamplus'], 'channel': 'Default Channel', 'dislike_count': int, - 'upload_date': '20220707' - } + 'upload_date': '20220707', + }, }, { 'url': 'https://www.liputan6.com/tv/read/5007719/video-program-minyakita-minyak-goreng-kemasan-sederhana-seharga-rp-14-ribu', 'info_dict': { @@ -49,7 +49,7 @@ class Liputan6IE(InfoExtractor): 'thumbnail': 'https://thumbor.prod.vidiocdn.com/AAIOjz-64hKojjdw5hr0oNNEeJg=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7082543/program-minyakita-minyak-goreng-kemasan-sederhana-seharga-rp14-ribu-_-liputan-6-7d9fbb.jpg', 'channel': 'Liputan 6 Pagi', 'view_count': int, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/listennotes.py b/yt_dlp/extractor/listennotes.py index 4ebc9be..61eae95 100644 --- a/yt_dlp/extractor/listennotes.py +++ b/yt_dlp/extractor/listennotes.py @@ -31,7 +31,7 @@ class ListenNotesIE(InfoExtractor): 'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg', 'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/', 'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'], - } + }, }, { 'url': 'https://www.listennotes.com/podcasts/ask-noah-show/episode-177-wireguard-with-lwEA3154JzG/', 'md5': '62fb4ffe7fc525632a1138bf72a5ce53', @@ -47,7 +47,7 @@ class ListenNotesIE(InfoExtractor): 'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/', 'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg', 'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'], - } + }, }] def _clean_description(self, description): @@ -82,5 +82,5 @@ class ListenNotesIE(InfoExtractor): 'cast': ('nlp_entities', ..., 'name'), 'channel_url': 'channel_url', 'channel_id': 'channel_short_uuid', - }) + }), } diff --git a/yt_dlp/extractor/litv.py b/yt_dlp/extractor/litv.py index 1003fb2..93f926a 100644 --- a/yt_dlp/extractor/litv.py +++ b/yt_dlp/extractor/litv.py @@ -113,7 +113,7 @@ class LiTVIE(InfoExtractor): endpoint = 'getMainUrlNoAuth' video_data = self._download_json( f'https://www.litv.tv/vod/ajax/{endpoint}', video_id, - data=json.dumps(payload).encode('utf-8'), + data=json.dumps(payload).encode(), headers={'Content-Type': 'application/json'}) if not video_data.get('fullpath'): @@ -121,8 +121,8 @@ class LiTVIE(InfoExtractor): if error_msg == 'vod.error.outsideregionerror': self.raise_geo_restricted('This video is available in Taiwan only') if error_msg: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_msg), expected=True) - raise ExtractorError('Unexpected result from %s' % self.IE_NAME) + raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True) + raise ExtractorError(f'Unexpected result from {self.IE_NAME}') formats = self._extract_m3u8_formats( video_data['fullpath'], video_id, ext='mp4', diff --git a/yt_dlp/extractor/livejournal.py b/yt_dlp/extractor/livejournal.py index 96bd8b2..c61f9be 100644 --- a/yt_dlp/extractor/livejournal.py +++ b/yt_dlp/extractor/livejournal.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import int_or_none @@ -14,7 +13,7 @@ class LiveJournalIE(InfoExtractor): 'title': 'Истребители против БПЛА', 'upload_date': '20190624', 'timestamp': 1561406715, - } + }, } def _real_extract(self, url): @@ -23,7 +22,7 @@ class LiveJournalIE(InfoExtractor): record = self._parse_json(self._search_regex( r'Site\.page\s*=\s*({.+?});', webpage, 'page data'), video_id)['video']['record'] - storage_id = compat_str(record['storageid']) + storage_id = str(record['storageid']) title = record.get('name') if title: # remove filename extension(.mp4, .mov, etc...) diff --git a/yt_dlp/extractor/livestream.py b/yt_dlp/extractor/livestream.py index a05a0fa..7f7947e 100644 --- a/yt_dlp/extractor/livestream.py +++ b/yt_dlp/extractor/livestream.py @@ -1,8 +1,8 @@ import itertools import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_str, compat_urlparse from ..utils import ( determine_ext, find_xpath_attr, @@ -41,13 +41,13 @@ class LivestreamIE(InfoExtractor): 'like_count': int, 'view_count': int, 'comment_count': int, - 'thumbnail': r're:^http://.*\.jpg$' - } + 'thumbnail': r're:^http://.*\.jpg$', + }, }, { 'url': 'https://livestream.com/coheedandcambria/websterhall', 'info_dict': { 'id': '1585861', - 'title': 'Live From Webster Hall' + 'title': 'Live From Webster Hall', }, 'playlist_mincount': 1, }, { @@ -69,8 +69,8 @@ class LivestreamIE(InfoExtractor): 'timestamp': 1331042383, 'thumbnail': 'http://img.new.livestream.com/videos/0000000000000372/cacbeed6-fb68-4b5e-ad9c-e148124e68a9_640x427.jpg', 'duration': 15.332, - 'ext': 'mp4' - } + 'ext': 'mp4', + }, }, { 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', 'only_matching': True, @@ -92,7 +92,7 @@ class LivestreamIE(InfoExtractor): for vn in video_nodes: tbr = int_or_none(vn.attrib.get('system-bitrate'), 1000) furl = ( - update_url_query(compat_urlparse.urljoin(base, vn.attrib['src']), { + update_url_query(urllib.parse.urljoin(base, vn.attrib['src']), { 'v': '3.0.3', 'fp': 'WIN% 14,0,0,145', })) @@ -108,7 +108,7 @@ class LivestreamIE(InfoExtractor): return formats, {} def _extract_video_info(self, video_data): - video_id = compat_str(video_data['id']) + video_id = str(video_data['id']) FORMAT_KEYS = ( ('sd', 'progressive_url'), @@ -123,7 +123,7 @@ class LivestreamIE(InfoExtractor): if ext == 'm3u8': continue bitrate = int_or_none(self._search_regex( - r'(\d+)\.%s' % ext, video_url, 'bitrate', default=None)) + rf'(\d+)\.{ext}', video_url, 'bitrate', default=None)) formats.append({ 'url': video_url, 'format_id': format_id, @@ -169,7 +169,7 @@ class LivestreamIE(InfoExtractor): } def _extract_stream_info(self, stream_info): - broadcast_id = compat_str(stream_info['broadcast_id']) + broadcast_id = str(stream_info['broadcast_id']) is_live = stream_info.get('is_live') formats = [] @@ -199,8 +199,8 @@ class LivestreamIE(InfoExtractor): } def _generate_event_playlist(self, event_data): - event_id = compat_str(event_data['id']) - account_id = compat_str(event_data['owner_account_id']) + event_id = str(event_data['id']) + account_id = str(event_data['owner_account_id']) feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json' stream_info = event_data.get('stream_info') @@ -212,15 +212,14 @@ class LivestreamIE(InfoExtractor): if last_video is None: info_url = feed_root_url else: - info_url = '{root}?&id={id}&newer=-1&type=video'.format( - root=feed_root_url, id=last_video) + info_url = f'{feed_root_url}?&id={last_video}&newer=-1&type=video' videos_info = self._download_json( info_url, event_id, f'Downloading page {i}')['data'] videos_info = [v['data'] for v in videos_info if v['type'] == 'video'] if not videos_info: break for v in videos_info: - v_id = compat_str(v['id']) + v_id = str(v['id']) yield self.url_result( f'http://livestream.com/accounts/{account_id}/events/{event_id}/videos/{v_id}', LivestreamIE, v_id, v.get('caption')) @@ -278,7 +277,7 @@ class LivestreamOriginalIE(InfoExtractor): }] def _extract_video_info(self, user, video_id): - api_url = 'http://x%sx.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id=%s' % (user, video_id) + api_url = f'http://x{user}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={video_id}' info = self._download_xml(api_url, video_id) item = info.find('channel').find('item') @@ -335,7 +334,7 @@ class LivestreamOriginalIE(InfoExtractor): entries = [{ '_type': 'url', - 'url': compat_urlparse.urljoin(url, p), + 'url': urllib.parse.urljoin(url, p), } for p in paths] return self.playlist_result(entries, folder_id) @@ -349,10 +348,10 @@ class LivestreamOriginalIE(InfoExtractor): return self._extract_folder(url, content_id) else: # this url is used on mobile devices - stream_url = 'http://x%sx.api.channel.livestream.com/3.0/getstream.json' % user + stream_url = f'http://x{user}x.api.channel.livestream.com/3.0/getstream.json' info = {} if content_id: - stream_url += '?id=%s' % content_id + stream_url += f'?id={content_id}' info = self._extract_video_info(user, content_id) else: content_id = user @@ -381,8 +380,7 @@ class LivestreamShortenerIE(InfoExtractor): _VALID_URL = r'https?://livestre\.am/(?P<id>.+)' def _real_extract(self, url): - mobj = self._match_valid_url(url) - id = mobj.group('id') - webpage = self._download_webpage(url, id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) return self.url_result(self._og_search_url(webpage)) diff --git a/yt_dlp/extractor/livestreamfails.py b/yt_dlp/extractor/livestreamfails.py index 0df6384..c18d05d 100644 --- a/yt_dlp/extractor/livestreamfails.py +++ b/yt_dlp/extractor/livestreamfails.py @@ -16,7 +16,7 @@ class LivestreamfailsIE(InfoExtractor): 'thumbnail': r're:^https?://.+', 'timestamp': 1656271785, 'upload_date': '20220626', - } + }, }, { 'url': 'https://livestreamfails.com/post/139200', 'only_matching': True, @@ -33,5 +33,5 @@ class LivestreamfailsIE(InfoExtractor): 'url': f'https://livestreamfails-video-prod.b-cdn.net/video/{api_response["videoId"]}', 'title': api_response.get('label'), 'creator': traverse_obj(api_response, ('streamer', 'label')), - 'thumbnail': format_field(api_response, 'imageId', 'https://livestreamfails-image-prod.b-cdn.net/image/%s') + 'thumbnail': format_field(api_response, 'imageId', 'https://livestreamfails-image-prod.b-cdn.net/image/%s'), } diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py index 6282d2e..31a7cef 100644 --- a/yt_dlp/extractor/lnkgo.py +++ b/yt_dlp/extractor/lnkgo.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( clean_html, format_field, @@ -58,10 +57,10 @@ class LnkGoIE(InfoExtractor): display_id, video_id = self._match_valid_url(url).groups() video_info = self._download_json( - 'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'), + 'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'), display_id)['videoConfig']['videoInfo'] - video_id = compat_str(video_info['id']) + video_id = str(video_info['id']) title = video_info['title'] prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4' formats = self._extract_m3u8_formats( @@ -98,9 +97,9 @@ class LnkIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'episode_number': 13431, 'series': 'Naujausi žinių reportažai', - 'episode': 'Episode 13431' + 'episode': 'Episode 13431', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://lnk.lt/istorijos-trumpai/152546', 'info_dict': { @@ -114,9 +113,9 @@ class LnkIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'episode_number': 1036, 'series': 'Istorijos trumpai', - 'episode': 'Episode 1036' + 'episode': 'Episode 1036', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://lnk.lt/gyvunu-pasaulis/151549', 'info_dict': { @@ -130,26 +129,26 @@ class LnkIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'episode_number': 16, 'series': 'Gyvūnų pasaulis', - 'episode': 'Episode 16' + 'episode': 'Episode 16', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): - id = self._match_id(url) - video_json = self._download_json(f'https://lnk.lt/api/video/video-config/{id}', id)['videoInfo'] + video_id = self._match_id(url) + video_json = self._download_json(f'https://lnk.lt/api/video/video-config/{video_id}', video_id)['videoInfo'] formats, subtitles = [], {} if video_json.get('videoUrl'): - fmts, subs = self._extract_m3u8_formats_and_subtitles(video_json['videoUrl'], id) + fmts, subs = self._extract_m3u8_formats_and_subtitles(video_json['videoUrl'], video_id) formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) if video_json.get('videoFairplayUrl') and not video_json.get('drm'): - fmts, subs = self._extract_m3u8_formats_and_subtitles(video_json['videoFairplayUrl'], id) + fmts, subs = self._extract_m3u8_formats_and_subtitles(video_json['videoFairplayUrl'], video_id) formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) return { - 'id': id, + 'id': video_id, 'title': video_json.get('title'), 'description': video_json.get('description'), 'view_count': video_json.get('viewsCount'), diff --git a/yt_dlp/extractor/lovehomeporn.py b/yt_dlp/extractor/lovehomeporn.py index ba5a13a..63b75a3 100644 --- a/yt_dlp/extractor/lovehomeporn.py +++ b/yt_dlp/extractor/lovehomeporn.py @@ -15,7 +15,7 @@ class LoveHomePornIE(NuevoBaseIE): }, 'params': { 'skip_download': True, - } + }, } def _real_extract(self, url): @@ -24,10 +24,10 @@ class LoveHomePornIE(NuevoBaseIE): display_id = mobj.group('display_id') info = self._extract_nuevo( - 'http://lovehomeporn.com/media/nuevo/config.php?key=%s' % video_id, + f'http://lovehomeporn.com/media/nuevo/config.php?key={video_id}', video_id) info.update({ 'display_id': display_id, - 'age_limit': 18 + 'age_limit': 18, }) return info diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index 80d4d1c..1a0b6da 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -22,8 +22,8 @@ class LRTStreamIE(LRTBaseIE): 'id': 'lrt-opus', 'live_status': 'is_live', 'title': 're:^LRT Opus.+$', - 'ext': 'mp4' - } + 'ext': 'mp4', + }, }] def _real_extract(self, url): @@ -44,7 +44,7 @@ class LRTStreamIE(LRTBaseIE): 'formats': formats, 'subtitles': subtitles, 'is_live': True, - 'title': f'{self._og_search_title(webpage)} - {stream_title}' + 'title': f'{self._og_search_title(webpage)} - {stream_title}', } @@ -62,7 +62,7 @@ class LRTVODIE(LRTBaseIE): 'timestamp': 1604079000, 'upload_date': '20201030', 'tags': ['LRT TELEVIZIJA', 'Beatos virtuvė', 'Beata Nicholson', 'Makaronai', 'Baklažanai', 'Vakarienė', 'Receptas'], - 'thumbnail': 'https://www.lrt.lt/img/2020/10/30/764041-126478-1287x836.jpg' + 'thumbnail': 'https://www.lrt.lt/img/2020/10/30/764041-126478-1287x836.jpg', }, }, { # direct mp3 download diff --git a/yt_dlp/extractor/lsm.py b/yt_dlp/extractor/lsm.py index 35a831f..f5be08f 100644 --- a/yt_dlp/extractor/lsm.py +++ b/yt_dlp/extractor/lsm.py @@ -33,7 +33,7 @@ class LSMLREmbedIE(InfoExtractor): 'duration': 1823, 'title': '#138 Nepilnīgā kompensējamo zāļu sistēma pat mēnešiem dzenā pacientus pa aptiekām', 'thumbnail': 'https://pic.latvijasradio.lv/public/assets/media/9/d/gallery_fd4675ac.jpg', - } + }, }, { 'url': 'https://radioteatris.lsm.lv/lv/embed/?id=&show=1270&theme=white&size=16x9', 'info_dict': { @@ -59,7 +59,7 @@ class LSMLREmbedIE(InfoExtractor): 'title': 'Jens Ahlboms "Spārni". Radioizrāde ar Mārtiņa Freimaņa mūziku', 'thumbnail': 'https://radioteatris.lsm.lv/public/assets/shows/62f13023a457c.jpg', 'duration': 1788, - } + }, }, { 'url': 'https://lr1.lsm.lv/lv/embed/?id=166557&show=0&theme=white&size=16x9', 'info_dict': { @@ -168,7 +168,7 @@ class LSMLTVEmbedIE(InfoExtractor): 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', - } + }, }, { 'url': 'https://ltv.lsm.lv/embed?enablesdkjs=1&c=eyJpdiI6IncwVzZmUFk2MU12enVWK1I3SUcwQ1E9PSIsInZhbHVlIjoid3FhV29vamc3T2sxL1RaRmJ5Rm1GTXozU0o2dVczdUtLK0cwZEZJMDQ2a3ZIRG5DK2pneGlnbktBQy9uazVleHN6VXhxdWIweWNvcHRDSnlISlNYOHlVZ1lpcTUrcWZSTUZPQW14TVdkMW9aOUtRWVNDcFF4eWpHNGcrT0VZbUNFQStKQk91cGpndW9FVjJIa0lpbkh3PT0iLCJtYWMiOiIyZGI1NDJlMWRlM2QyMGNhOGEwYTM2MmNlN2JlOGRhY2QyYjdkMmEzN2RlOTEzYTVkNzI1ODlhZDlhZjU4MjQ2IiwidGFnIjoiIn0=', 'md5': 'a1711e190fe680fdb68fd8413b378e87', @@ -198,7 +198,7 @@ class LSMLTVEmbedIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/@LTV16plus', 'like_count': int, 'description': 'md5:7ff0c42ba971e3c13e4b8a2ff03b70b5', - } + }, }] def _real_extract(self, url): @@ -239,7 +239,7 @@ class LSMReplayIE(InfoExtractor): 'upload_date': '20231121', 'title': '4. studija. Zolitūdes traģēdija un Inčupes stacija', 'thumbnail': 'https://ltv.lsm.lv/storage/media/8/7/large/5/1f9604e1.jpg', - } + }, }, { 'url': 'https://replay.lsm.lv/lv/ieraksts/lr/183522/138-nepilniga-kompensejamo-zalu-sistema-pat-menesiem-dzena-pacientus-pa-aptiekam', 'md5': '719b33875cd1429846eeeaeec6df2830', @@ -252,7 +252,7 @@ class LSMReplayIE(InfoExtractor): 'upload_date': '20231102', 'timestamp': 1698921060, 'description': 'md5:7bac3b2dd41e44325032943251c357b1', - } + }, }, { 'url': 'https://replay.lsm.lv/ru/statja/ltv/311130/4-studija-zolitudes-tragedija-un-incupes-stacija', 'only_matching': True, diff --git a/yt_dlp/extractor/lumni.py b/yt_dlp/extractor/lumni.py index 5a95383..8c26f5b 100644 --- a/yt_dlp/extractor/lumni.py +++ b/yt_dlp/extractor/lumni.py @@ -12,7 +12,7 @@ class LumniIE(FranceTVBaseInfoExtractor): 'title': "L'homme et son environnement dans la révolution industrielle - L'ère de l'homme", 'thumbnail': 'https://assets.webservices.francetelevisions.fr/v1/assets/images/a7/17/9f/a7179f5f-63a5-4e11-8d4d-012ab942d905.jpg', 'duration': 230, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py index 768ce91..bfd4619 100644 --- a/yt_dlp/extractor/lynda.py +++ b/yt_dlp/extractor/lynda.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( ExtractorError, int_or_none, @@ -21,11 +18,11 @@ class LyndaBaseIE(InfoExtractor): @staticmethod def _check_error(json_string, key_or_keys): - keys = [key_or_keys] if isinstance(key_or_keys, compat_str) else key_or_keys + keys = [key_or_keys] if isinstance(key_or_keys, str) else key_or_keys for key in keys: error = json_string.get(key) if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) def _perform_login_step(self, form_html, fallback_action_url, extra_form_data, note, referrer_url): action_url = self._search_regex( @@ -33,7 +30,7 @@ class LyndaBaseIE(InfoExtractor): 'post url', default=fallback_action_url, group='url') if not action_url.startswith('http'): - action_url = compat_urlparse.urljoin(self._SIGNIN_URL, action_url) + action_url = urllib.parse.urljoin(self._SIGNIN_URL, action_url) form_data = self._hidden_inputs(form_html) form_data.update(extra_form_data) @@ -44,7 +41,7 @@ class LyndaBaseIE(InfoExtractor): headers={ 'Referer': referrer_url, 'X-Requested-With': 'XMLHttpRequest', - }, expected_status=(418, 500, )) + }, expected_status=(418, 500)) self._check_error(response, ('email', 'password', 'ErrorMessage')) @@ -97,8 +94,8 @@ class LyndaIE(LyndaBaseIE): 'id': '114408', 'ext': 'mp4', 'title': 'Using the exercise files', - 'duration': 68 - } + 'duration': 68, + }, }, { 'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0', 'only_matching': True, @@ -116,7 +113,7 @@ class LyndaIE(LyndaBaseIE): def _raise_unavailable(self, video_id): self.raise_login_required( - 'Video %s is only available for members' % video_id) + f'Video {video_id} is only available for members') def _real_extract(self, url): mobj = self._match_valid_url(url) @@ -137,8 +134,7 @@ class LyndaIE(LyndaBaseIE): query['courseId'] = course_id play = self._download_json( - 'https://www.lynda.com/ajax/course/%s/%s/play' - % (course_id, video_id), video_id, 'Downloading play JSON') + f'https://www.lynda.com/ajax/course/{course_id}/{video_id}/play', video_id, 'Downloading play JSON') if not play: self._raise_unavailable(video_id) @@ -154,7 +150,7 @@ class LyndaIE(LyndaBaseIE): continue formats.append({ 'url': format_url, - 'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id, + 'format_id': f'{cdn}-{format_id}' if cdn else format_id, 'height': int_or_none(format_id), }) @@ -174,12 +170,12 @@ class LyndaIE(LyndaBaseIE): if 'Status' in video: raise ExtractorError( - 'lynda returned error: %s' % video['Message'], expected=True) + 'lynda returned error: {}'.format(video['Message']), expected=True) if video.get('HasAccess') is False: self._raise_unavailable(video_id) - video_id = compat_str(video.get('ID') or video_id) + video_id = str(video.get('ID') or video_id) duration = int_or_none(video.get('DurationInSeconds')) title = video['Title'] @@ -193,7 +189,7 @@ class LyndaIE(LyndaBaseIE): 'width': int_or_none(f.get('Width')), 'height': int_or_none(f.get('Height')), 'filesize': int_or_none(f.get('FileSize')), - 'format_id': compat_str(f.get('Resolution')) if f.get('Resolution') else None, + 'format_id': str(f.get('Resolution')) if f.get('Resolution') else None, } for f in fmts if f.get('Url')]) prioritized_streams = video.get('PrioritizedStreams') @@ -202,7 +198,7 @@ class LyndaIE(LyndaBaseIE): formats.extend([{ 'url': video_url, 'height': int_or_none(format_id), - 'format_id': '%s-%s' % (prioritized_stream_id, format_id), + 'format_id': f'{prioritized_stream_id}-{format_id}', } for format_id, video_url in prioritized_stream.items()]) self._check_formats(formats, video_id) @@ -214,18 +210,16 @@ class LyndaIE(LyndaBaseIE): 'title': title, 'duration': duration, 'subtitles': subtitles, - 'formats': formats + 'formats': formats, } def _fix_subtitles(self, subs): srt = '' seq_counter = 0 - for pos in range(0, len(subs) - 1): - seq_current = subs[pos] + for seq_current, seq_next in zip(subs, subs[1:]): m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) if m_current is None: continue - seq_next = subs[pos + 1] m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode']) if m_next is None: continue @@ -234,12 +228,12 @@ class LyndaIE(LyndaBaseIE): text = seq_current['Caption'].strip() if text: seq_counter += 1 - srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (seq_counter, appear_time, disappear_time, text) + srt += f'{seq_counter}\r\n{appear_time} --> {disappear_time}\r\n{text}\r\n\r\n' if srt: return srt def _get_subtitles(self, video_id): - url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id + url = f'https://www.lynda.com/ajax/player?videoId={video_id}&type=transcript' subs = self._download_webpage( url, video_id, 'Downloading subtitles JSON', fatal=False) if not subs or 'Status="NotFound"' in subs: @@ -274,10 +268,10 @@ class LyndaCourseIE(LyndaBaseIE): course_path = mobj.group('coursepath') course_id = mobj.group('courseid') - item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path + item_template = f'https://www.lynda.com/{course_path}/%s-4.html' course = self._download_json( - 'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, + f'https://www.lynda.com/ajax/player?courseId={course_id}&type=course', course_id, 'Downloading course JSON', fatal=False) if not course: @@ -295,7 +289,7 @@ class LyndaCourseIE(LyndaBaseIE): if course.get('Status') == 'NotFound': raise ExtractorError( - 'Course %s does not exist' % course_id, expected=True) + f'Course {course_id} does not exist', expected=True) unaccessible_videos = 0 entries = [] @@ -316,13 +310,13 @@ class LyndaCourseIE(LyndaBaseIE): 'ie_key': LyndaIE.ie_key(), 'chapter': chapter.get('Title'), 'chapter_number': int_or_none(chapter.get('ChapterIndex')), - 'chapter_id': compat_str(chapter.get('ID')), + 'chapter_id': str(chapter.get('ID')), }) if unaccessible_videos > 0: self.report_warning( - '%s videos are only available for members (or paid members) and will not be downloaded. ' - % unaccessible_videos + self._ACCOUNT_CREDENTIALS_HINT) + f'{unaccessible_videos} videos are only available for members (or paid members) ' + f'and will not be downloaded. {self._ACCOUNT_CREDENTIALS_HINT}') course_title = course.get('Title') course_description = course.get('Description') diff --git a/yt_dlp/extractor/magentamusik.py b/yt_dlp/extractor/magentamusik.py index 9d86a1b..5bfc0a1 100644 --- a/yt_dlp/extractor/magentamusik.py +++ b/yt_dlp/extractor/magentamusik.py @@ -19,7 +19,7 @@ class MagentaMusikIE(InfoExtractor): 'categories': ['Musikkonzert'], 'release_year': 2023, 'location': 'Deutschland', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py index 0f0550c..cca678f 100644 --- a/yt_dlp/extractor/mailru.py +++ b/yt_dlp/extractor/mailru.py @@ -4,7 +4,6 @@ import re import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote from ..utils import ( int_or_none, parse_duration, @@ -100,7 +99,7 @@ class MailRuIE(InfoExtractor): { 'url': 'https://videoapi.my.mail.ru/videos/embed/mail/cloud-strife/Games/2009.html', 'only_matching': True, - } + }, ] def _real_extract(self, url): @@ -109,7 +108,7 @@ class MailRuIE(InfoExtractor): video_id = None if meta_id: - meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id + meta_url = f'https://my.mail.ru/+/video/meta/{meta_id}' else: video_id = mobj.group('idv1') if not video_id: @@ -138,7 +137,7 @@ class MailRuIE(InfoExtractor): # Fallback old approach if not video_data: video_data = self._download_json( - 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, + f'http://api.video.mail.ru/videos/{video_id}.json?new=1', video_id, 'Downloading video JSON') video_key = self._get_cookies('https://my.mail.ru').get('video_key') @@ -169,7 +168,7 @@ class MailRuIE(InfoExtractor): acc_id = meta_data.get('accId') item_id = meta_data.get('itemId') - content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id + content_id = f'{acc_id}_{item_id}' if acc_id and item_id else video_id thumbnail = meta_data.get('poster') duration = int_or_none(meta_data.get('duration')) @@ -192,7 +191,7 @@ class MailRuMusicSearchBaseIE(InfoExtractor): def _search(self, query, url, audio_id, limit=100, offset=0): search = self._download_json( 'https://my.mail.ru/cgi-bin/my/ajax', audio_id, - 'Downloading songs JSON page %d' % (offset // limit + 1), + f'Downloading songs JSON page {offset // limit + 1}', headers={ 'Referer': url, 'X-Requested-With': 'XMLHttpRequest', @@ -236,7 +235,7 @@ class MailRuMusicSearchBaseIE(InfoExtractor): artist = t.get('Author') or t.get('Author_Text_HTML') if track: - title = '%s - %s' % (artist, track) if artist else track + title = f'{artist} - {track}' if artist else track else: title = audio_id @@ -307,7 +306,7 @@ class MailRuMusicSearchIE(MailRuMusicSearchBaseIE): }] def _real_extract(self, url): - query = compat_urllib_parse_unquote(self._match_id(url)) + query = urllib.parse.unquote(self._match_id(url)) entries = [] diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py index fa12a6a..fb93505 100644 --- a/yt_dlp/extractor/mainstreaming.py +++ b/yt_dlp/extractor/mainstreaming.py @@ -30,9 +30,9 @@ class MainStreamingIE(InfoExtractor): }, 'expected_warnings': [ 'Ignoring alternative content ID: WDAF1KOWUpH3', - 'MainStreaming said: Live event is OFFLINE' + 'MainStreaming said: Live event is OFFLINE', ], - 'skip': 'live stream offline' + 'skip': 'live stream offline', }, { # playlist 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/WDAF1KOWUpH3', @@ -40,7 +40,7 @@ class MainStreamingIE(InfoExtractor): 'id': 'WDAF1KOWUpH3', 'title': 'Playlist homepage', }, - 'playlist_mincount': 2 + 'playlist_mincount': 2, }, { # livestream 'url': 'https://webtools-859c1818ed614cc5b0047439470927b0.msvdn.net/embed/tDoFkZD3T1Lw', @@ -51,7 +51,7 @@ class MainStreamingIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster', }, - 'skip': 'live stream' + 'skip': 'live stream', }, { 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/EUlZfGWkGpOd?autoPlay=false', 'info_dict': { @@ -61,8 +61,8 @@ class MainStreamingIE(InfoExtractor): 'ext': 'mp4', 'live_status': 'not_live', 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster', - 'duration': 1512 - } + 'duration': 1512, + }, }, { # video without webtools- prefix 'url': 'https://f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/MfuWmzL2lGkA?autoplay=false&T=1635860445', @@ -73,8 +73,8 @@ class MainStreamingIE(InfoExtractor): 'ext': 'mp4', 'live_status': 'not_live', 'thumbnail': r're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster', - 'duration': 789.04 - } + 'duration': 789.04, + }, }, { # always-on livestream with DVR 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/HVvPMzy', @@ -92,14 +92,14 @@ class MainStreamingIE(InfoExtractor): }, { # no host 'url': 'https://webtools.msvdn.net/embed/MfuWmzL2lGkA', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/amp_embed/tDoFkZD3T1Lw', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/content/tDoFkZD3T1Lw#', - 'only_matching': True - } + 'only_matching': True, + }, ] def _playlist_entries(self, host, playlist_content): @@ -111,7 +111,7 @@ class MainStreamingIE(InfoExtractor): 'id': content_id, 'duration': int_or_none(traverse_obj(entry, ('duration', 'totalSeconds'))), 'title': entry.get('title'), - 'url': f'https://{host}/embed/{content_id}' + 'url': f'https://{host}/embed/{content_id}', } @staticmethod @@ -205,5 +205,5 @@ class MainStreamingIE(InfoExtractor): 'duration': parse_duration(content_info.get('duration')), 'tags': content_info.get('tags'), 'subtitles': subtitles, - 'thumbnail': urljoin(self._get_webtools_base_url(host), f'image/{video_id}/poster') + 'thumbnail': urljoin(self._get_webtools_base_url(host), f'image/{video_id}/poster'), } diff --git a/yt_dlp/extractor/mangomolo.py b/yt_dlp/extractor/mangomolo.py index efaf66f..2231f71 100644 --- a/yt_dlp/extractor/mangomolo.py +++ b/yt_dlp/extractor/mangomolo.py @@ -1,8 +1,7 @@ +import base64 +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, -) from ..utils import classproperty, int_or_none @@ -33,14 +32,14 @@ class MangomoloBaseIE(InfoExtractor): def _real_extract(self, url): page_id = self._get_real_id(self._match_id(url)) webpage = self._download_webpage( - 'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id) + 'https://player.mangomolo.com/v1/{}?{}'.format(self._TYPE, url.split('?')[1]), page_id) hidden_inputs = self._hidden_inputs(webpage) m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native' format_url = self._html_search_regex( [ r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)', - r'<a[^>]+href="(rtsp://[^"]+)"' + r'<a[^>]+href="(rtsp://[^"]+)"', ], webpage, 'format url') formats = self._extract_wowza_formats( format_url, page_id, m3u8_entry_protocol, ['smil']) @@ -70,4 +69,4 @@ class MangomoloLiveIE(MangomoloBaseIE): _IS_LIVE = True def _get_real_id(self, page_id): - return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode() + return base64.b64decode(urllib.parse.unquote(page_id)).decode() diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py index 44c321c..1dd0b15 100644 --- a/yt_dlp/extractor/manoto.py +++ b/yt_dlp/extractor/manoto.py @@ -20,11 +20,11 @@ class ManotoTVIE(InfoExtractor): 'title': 'کارول و جان', 'description': 'md5:d0fff1f8ba5c6775d312a00165d1a97e', 'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$', - 'ext': 'mp4' + 'ext': 'mp4', }, 'params': { 'skip_download': 'm3u8', - } + }, }, { 'url': 'https://www.manototv.com/episode/12576', 'info_dict': { @@ -37,11 +37,11 @@ class ManotoTVIE(InfoExtractor): 'title': 'سه ماه تعطیلی', 'description': 'سه ماه تعطیلی فیلمی به کارگردانی و نویسندگی شاپور قریب ساختهٔ سال ۱۳۵۶ است.', 'thumbnail': r're:^https?://.*\.(jpeg|png|jpg)$', - 'ext': 'mp4' + 'ext': 'mp4', }, 'params': { 'skip_download': 'm3u8', - } + }, }] def _real_extract(self, url): @@ -93,7 +93,7 @@ class ManotoTVShowIE(InfoExtractor): entries = [ self.url_result( - 'https://www.manototv.com/episode/%s' % item['slideID'], ie=ManotoTVIE.ie_key(), video_id=item['slideID']) + 'https://www.manototv.com/episode/{}'.format(item['slideID']), ie=ManotoTVIE.ie_key(), video_id=item['slideID']) for item in playlist] return self.playlist_result(entries, show_id, title, description) @@ -111,7 +111,7 @@ class ManotoTVLiveIE(InfoExtractor): }, 'params': { 'skip_download': 'm3u8', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index 2aa3a3c..8caa8f8 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -44,7 +44,7 @@ class ManyVidsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, ) + real_url = f'https://www.manyvids.com/video/{video_id}/gtm.js' try: webpage = self._download_webpage(real_url, video_id) except Exception: @@ -75,7 +75,7 @@ class ManyVidsIE(InfoExtractor): def mung_title(s): if uploader: - s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s) + s = re.sub(rf'^\s*{re.escape(uploader)}\s+[|-]', '', s) return txt_or_none(s) title = ( @@ -106,7 +106,7 @@ class ManyVidsIE(InfoExtractor): 'vid': video_id, }), headers={ 'Referer': url, - 'X-Requested-With': 'XMLHttpRequest' + 'X-Requested-With': 'XMLHttpRequest', }) formats = [] @@ -138,7 +138,7 @@ class ManyVidsIE(InfoExtractor): def get_likes(): likes = self._search_regex( - r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ), + rf'''(<a\b[^>]*\bdata-id\s*=\s*(['"]){video_id}\2[^>]*>)''', webpage, 'likes', default='') likes = extract_attributes(likes) return int_or_none(likes.get('data-likes')) diff --git a/yt_dlp/extractor/markiza.py b/yt_dlp/extractor/markiza.py index ca465ea..088b60d 100644 --- a/yt_dlp/extractor/markiza.py +++ b/yt_dlp/extractor/markiza.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( orderedSet, parse_duration, @@ -60,11 +59,11 @@ class MarkizaIE(InfoExtractor): info.update({ 'id': video_id, 'title': try_get( - data, lambda x: x['details']['name'], compat_str), + data, lambda x: x['details']['name'], str), }) else: info['duration'] = parse_duration( - try_get(data, lambda x: x['details']['duration'], compat_str)) + try_get(data, lambda x: x['details']['duration'], str)) return info @@ -104,7 +103,7 @@ class MarkizaPageIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url) + return False if MarkizaIE.suitable(url) else super().suitable(url) def _real_extract(self, url): playlist_id = self._match_id(url) @@ -116,7 +115,7 @@ class MarkizaPageIE(InfoExtractor): url, playlist_id, expected_status=500) entries = [ - self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id) + self.url_result(f'http://videoarchiv.markiza.sk/video/{video_id}') for video_id in orderedSet(re.findall( r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)', webpage))] diff --git a/yt_dlp/extractor/massengeschmacktv.py b/yt_dlp/extractor/massengeschmacktv.py index 1490e9b..43c0873 100644 --- a/yt_dlp/extractor/massengeschmacktv.py +++ b/yt_dlp/extractor/massengeschmacktv.py @@ -22,7 +22,7 @@ class MassengeschmackTVIE(InfoExtractor): 'id': 'fktv202', 'ext': 'mp4', 'title': 'Fernsehkritik-TV #202', - 'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg' + 'thumbnail': 'https://cache.massengeschmack.tv/img/mag/fktv202.jpg', }, } diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py index c3c58d7..4aa2c98 100644 --- a/yt_dlp/extractor/masters.py +++ b/yt_dlp/extractor/masters.py @@ -15,7 +15,7 @@ class MastersIE(InfoExtractor): 'title': 'Sungjae Im: Thursday Interview 2022', 'upload_date': '20220407', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py index a67fa9f..93799fe 100644 --- a/yt_dlp/extractor/matchtv.py +++ b/yt_dlp/extractor/matchtv.py @@ -1,51 +1,35 @@ -import random - from .common import InfoExtractor -from ..utils import xpath_text class MatchTVIE(InfoExtractor): - _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)' + _VALID_URL = [ + r'https?://matchtv\.ru/on-air/?(?:$|[?#])', + r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])', + ] _TESTS = [{ - 'url': 'http://matchtv.ru/#live-player', + 'url': 'http://matchtv.ru/on-air/', 'info_dict': { 'id': 'matchtv-live', - 'ext': 'flv', + 'ext': 'mp4', 'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', - 'is_live': True, + 'live_status': 'is_live', }, 'params': { 'skip_download': True, }, }, { - 'url': 'http://matchtv.ru/on-air/', + 'url': 'https://video.matchtv.ru/iframe/channel/106', 'only_matching': True, }] def _real_extract(self, url): video_id = 'matchtv-live' - video_url = self._download_json( - 'http://player.matchtv.ntvplus.tv/player/smil', video_id, - query={ - 'ts': '', - 'quality': 'SD', - 'contentId': '561d2c0df7159b37178b4567', - 'sign': '', - 'includeHighlights': '0', - 'userId': '', - 'sessionId': random.randint(1, 1000000000), - 'contentType': 'channel', - 'timeShift': '0', - 'platform': 'portal', - }, - headers={ - 'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf', - })['data']['videoUrl'] - f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') - formats = self._extract_f4m_formats(f4m_url, video_id) + webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id) + video_url = self._html_search_regex( + r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8' return { 'id': video_id, 'title': 'Матч ТВ - Прямой эфир', 'is_live': True, - 'formats': formats, + 'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True), } diff --git a/yt_dlp/extractor/mdr.py b/yt_dlp/extractor/mdr.py index 49f5b49..46097fa 100644 --- a/yt_dlp/extractor/mdr.py +++ b/yt_dlp/extractor/mdr.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( determine_ext, int_or_none, @@ -104,7 +105,7 @@ class MDRIE(InfoExtractor): webpage, 'data url', group='url').replace(r'\/', '/') doc = self._download_xml( - compat_urlparse.urljoin(url, data_url), video_id) + urllib.parse.urljoin(url, data_url), video_id) title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True) @@ -118,7 +119,7 @@ class MDRIE(InfoExtractor): 'progressiveDownload', 'dynamicHttpStreamingRedirector', 'adaptiveHttpStreamingRedirector'): - url_el = asset.find('./%sUrl' % source) + url_el = asset.find(f'./{source}Url') if url_el is None: continue diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index d040fb4..d64dbfe 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, float_or_none, @@ -31,7 +30,7 @@ class MedalTVIE(InfoExtractor): 'view_count': int, 'like_count': int, 'duration': 13, - } + }, }, { 'url': 'https://medal.tv/games/cod-cold-war/clips/2mA60jWAGQCBH', 'md5': 'fc7a3e4552ae8993c1c4006db46be447', @@ -50,7 +49,7 @@ class MedalTVIE(InfoExtractor): 'view_count': int, 'like_count': int, 'duration': 23, - } + }, }, { 'url': 'https://medal.tv/games/cod-cold-war/clips/2um24TWdty0NA', 'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148', @@ -69,7 +68,7 @@ class MedalTVIE(InfoExtractor): 'view_count': int, 'like_count': int, 'duration': 9, - } + }, }, { 'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9', 'only_matching': True, @@ -108,13 +107,13 @@ class MedalTVIE(InfoExtractor): 'url': item_url, id_key: item_id, 'width': width, - 'height': height + 'height': height, }) formats = [] thumbnails = [] for k, v in clip.items(): - if not (v and isinstance(v, compat_str)): + if not (v and isinstance(v, str)): continue mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k) if not mobj: @@ -136,7 +135,7 @@ class MedalTVIE(InfoExtractor): expected=True, video_id=video_id) else: self.raise_no_formats( - 'An unknown error occurred ({0}).'.format(error), + f'An unknown error occurred ({error}).', video_id=video_id) # Necessary because the id of the author is not known in advance. diff --git a/yt_dlp/extractor/mediaite.py b/yt_dlp/extractor/mediaite.py index 32887cb..b3fa6a1 100644 --- a/yt_dlp/extractor/mediaite.py +++ b/yt_dlp/extractor/mediaite.py @@ -15,7 +15,7 @@ class MediaiteIE(InfoExtractor): 'timestamp': 1631630185, 'upload_date': '20210914', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.mediaite.com/tv/joe-scarborough-goes-off-on-tax-breaks-for-super-wealthy-largest-income-redistribution-scam-in-american-history/', 'info_dict': { @@ -28,7 +28,7 @@ class MediaiteIE(InfoExtractor): 'timestamp': 1631618057, 'upload_date': '20210914', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.mediaite.com/politics/watch-rudy-giuliani-impersonates-queen-elizabeth-calls-mark-milley-an-asshle-in-bizarre-9-11-speech/', 'info_dict': { @@ -41,7 +41,7 @@ class MediaiteIE(InfoExtractor): 'timestamp': 1631536476, 'upload_date': '20210913', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.mediaite.com/podcasts/clarissa-ward-says-she-decided-to-become-a-journalist-on-9-11/', 'info_dict': { @@ -54,7 +54,7 @@ class MediaiteIE(InfoExtractor): 'timestamp': 1631311188, 'upload_date': '20210910', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.mediaite.com/opinion/mainstream-media-ignores-rose-mcgowans-bombshell-allegation-that-newsoms-wife-tried-to-silence-her-on-weinstein/', 'info_dict': { @@ -67,7 +67,7 @@ class MediaiteIE(InfoExtractor): 'timestamp': 1631553328, 'upload_date': '20210913', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.mediaite.com/news/watch-cnbcs-jim-cramer-says-nobody-wants-to-die-getting-infected-by-unvaccinated-coworker-even-for-22-an-hour/', 'info_dict': { @@ -80,7 +80,7 @@ class MediaiteIE(InfoExtractor): 'timestamp': 1633014214, 'upload_date': '20210930', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.mediaite.com/politics/i-cant-read-it-fast-enough-while-defending-trump-larry-kudlow-overwhelmed-by-volume-of-ex-presidents-legal-troubles/', 'info_dict': { @@ -93,7 +93,7 @@ class MediaiteIE(InfoExtractor): 'timestamp': 1691015535, 'upload_date': '20230802', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index c015977..bd1a27f 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote from ..utils import ( ExtractorError, traverse_obj, @@ -22,7 +23,7 @@ class MediaKlikkIE(InfoExtractor): 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig', 'ext': 'mp4', 'upload_date': '20210901', - 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg' + 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg', }, 'skip': 'Webpage redirects to 404 page', }, { @@ -34,8 +35,8 @@ class MediaKlikkIE(InfoExtractor): 'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja', 'ext': 'mp4', 'upload_date': '20230903', - 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg' - } + 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg', + }, }, { # (old) m4sport 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/', @@ -44,7 +45,7 @@ class MediaKlikkIE(InfoExtractor): 'title': 'Gyémánt Liga, Párizs', 'ext': 'mp4', 'upload_date': '20210830', - 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg' + 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg', }, 'skip': 'Webpage redirects to 404 page', }, { @@ -56,8 +57,8 @@ class MediaKlikkIE(InfoExtractor): 'display_id': 'atletika-gyemant-liga-brusszel', 'ext': 'mp4', 'upload_date': '20230908', - 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg' - } + 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg', + }, }, { # m4sport with *video/ url and no date 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/', @@ -66,8 +67,8 @@ class MediaKlikkIE(InfoExtractor): 'title': 'Real Madrid - Chelsea 1-1', 'display_id': 'real-madrid-chelsea-1-1', 'ext': 'mp4', - 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png' - } + 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png', + }, }, { # (old) hirado 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/', @@ -75,7 +76,7 @@ class MediaKlikkIE(InfoExtractor): 'id': '4760120', 'title': 'Feltételeket szabott a főváros', 'ext': 'mp4', - 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg' + 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg', }, 'skip': 'Webpage redirects to video list page', }, { @@ -87,8 +88,8 @@ class MediaKlikkIE(InfoExtractor): 'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal', 'ext': 'mp4', 'upload_date': '20230911', - 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg' - } + 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg', + }, }, { # (old) petofilive 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/', @@ -97,7 +98,7 @@ class MediaKlikkIE(InfoExtractor): 'title': 'Tha Shudras az Akusztikban', 'ext': 'mp4', 'upload_date': '20210607', - 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg' + 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg', }, 'skip': 'Webpage redirects to empty page', }, { @@ -109,8 +110,8 @@ class MediaKlikkIE(InfoExtractor): 'display_id': 'futball-fesztival-a-margitszigeten', 'ext': 'mp4', 'upload_date': '20230909', - 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg' - } + 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg', + }, }] def _real_extract(self, url): @@ -120,13 +121,13 @@ class MediaKlikkIE(InfoExtractor): player_data_str = self._html_search_regex( r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data') - player_data = self._parse_json(player_data_str, display_id, compat_urllib_parse_unquote) - video_id = compat_str(player_data['contentId']) + player_data = self._parse_json(player_data_str, display_id, urllib.parse.unquote) + video_id = str(player_data['contentId']) title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \ self._html_search_regex(r'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage, 'title') upload_date = unified_strdate( - '%s-%s-%s' % (mobj.group('year'), mobj.group('month'), mobj.group('day'))) + '{}-{}-{}'.format(mobj.group('year'), mobj.group('month'), mobj.group('day'))) if not upload_date: upload_date = unified_strdate(self._html_search_regex( r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None)) @@ -149,5 +150,5 @@ class MediaKlikkIE(InfoExtractor): 'display_id': display_id, 'formats': formats, 'upload_date': upload_date, - 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage) + 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage), } diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index b7df5c7..8cb18e6 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -29,7 +29,7 @@ class MediasetIE(ThePlatformBaseIE): ''' _EMBED_REGEX = [ - rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]' + rf'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//(?:\w+\.)+mediaset\.it/player/(?:v\d+/)?index\.html\?\S*?programGuid={_GUID_RE})[\'"&]', ] _TESTS = [{ # full episode @@ -154,14 +154,14 @@ class MediasetIE(ThePlatformBaseIE): }, 'params': { 'skip_download': True, - } + }, }] def _parse_smil_formats_and_subtitles( self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): for video in smil.findall(self._xpath_ns('.//video', namespace)): video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src']) - return super(MediasetIE, self)._parse_smil_formats_and_subtitles( + return super()._parse_smil_formats_and_subtitles( smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url) def _check_drm_formats(self, tp_formats, video_id): diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index d3fec4e..ad7ab27 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -1,11 +1,8 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( ExtractorError, float_or_none, @@ -18,13 +15,14 @@ from ..utils import ( url_or_none, urljoin, ) +from ..utils.traversal import traverse_obj _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})' class MediasiteIE(InfoExtractor): - _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/[^/#?]+/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE - _EMBED_REGEX = [r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE] + _VALID_URL = rf'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/[^/#?]+/Presentation)/(?P<id>{_ID_RE})(?P<query>\?[^#]+|)' + _EMBED_REGEX = [rf'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/{_ID_RE}(?:\?.*?)?)\1'] _TESTS = [ { 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d', @@ -86,7 +84,7 @@ class MediasiteIE(InfoExtractor): 'upload_date': '20120409', 'timestamp': 1333983600, 'duration': 7794, - } + }, }, { 'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d', @@ -100,7 +98,7 @@ class MediasiteIE(InfoExtractor): # dashed id 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d', 'only_matching': True, - } + }, ] # look in Mediasite.Core.js (Mediasite.ContentStreamType[*]) @@ -117,16 +115,16 @@ class MediasiteIE(InfoExtractor): for embed_url in super()._extract_embed_urls(url, webpage): yield smuggle_url(embed_url, {'UrlReferrer': url}) - def __extract_slides(self, *, stream_id, snum, Stream, duration, images): - slide_base_url = Stream['SlideBaseUrl'] + def __extract_slides(self, *, stream_id, snum, stream, duration, images): + slide_base_url = stream['SlideBaseUrl'] - fname_template = Stream['SlideImageFileNameTemplate'] + fname_template = stream['SlideImageFileNameTemplate'] if fname_template != 'slide_{0:D4}.jpg': self.report_warning('Unusual slide file name template; report a bug if slide downloading fails') fname_template = re.sub(r'\{0:D([0-9]+)\}', r'{0:0\1}', fname_template) fragments = [] - for i, slide in enumerate(Stream['Slides']): + for i, slide in enumerate(stream['Slides']): if i == 0: if slide['Time'] > 0: default_slide = images.get('DefaultSlide') @@ -141,18 +139,18 @@ class MediasiteIE(InfoExtractor): }) next_time = try_call( - lambda: Stream['Slides'][i + 1]['Time'], + lambda: stream['Slides'][i + 1]['Time'], lambda: duration, lambda: slide['Time'], expected_type=(int, float)) fragments.append({ 'path': fname_template.format(slide.get('Number', i + 1)), - 'duration': (next_time - slide['Time']) / 1000 + 'duration': (next_time - slide['Time']) / 1000, }) return { - 'format_id': '%s-%u.slides' % (stream_id, snum), + 'format_id': f'{stream_id}-{snum}.slides', 'ext': 'mhtml', 'url': slide_base_url, 'protocol': 'mhtml', @@ -173,12 +171,12 @@ class MediasiteIE(InfoExtractor): redirect_url = urlh.url # XXX: might have also extracted UrlReferrer and QueryString from the html - service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex( + service_path = urllib.parse.urljoin(redirect_url, self._html_search_regex( r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id, default='/Mediasite/PlayerService/PlayerService.svc/json')) player_options = self._download_json( - '%s/GetPlayerOptions' % service_path, resource_id, + f'{service_path}/GetPlayerOptions', resource_id, headers={ 'Content-type': 'application/json; charset=utf-8', 'X-Requested-With': 'XMLHttpRequest', @@ -189,25 +187,25 @@ class MediasiteIE(InfoExtractor): 'QueryString': query, 'UrlReferrer': data.get('UrlReferrer', ''), 'UseScreenReader': False, - } - }).encode('utf-8'))['d'] + }, + }).encode())['d'] presentation = player_options['Presentation'] title = presentation['Title'] if presentation is None: raise ExtractorError( - 'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'], + 'Mediasite says: {}'.format(player_options['PlayerPresentationStatusMessage']), expected=True) thumbnails = [] formats = [] - for snum, Stream in enumerate(presentation['Streams']): - stream_type = Stream.get('StreamType') + for snum, stream in enumerate(presentation['Streams']): + stream_type = stream.get('StreamType') if stream_type is None: continue - video_urls = Stream.get('VideoUrls') + video_urls = stream.get('VideoUrls') if not isinstance(video_urls, list): video_urls = [] @@ -215,36 +213,42 @@ class MediasiteIE(InfoExtractor): stream_type, 'type%u' % stream_type) stream_formats = [] - for unum, VideoUrl in enumerate(video_urls): - video_url = url_or_none(VideoUrl.get('Location')) + for unum, video in enumerate(video_urls): + video_url = url_or_none(video.get('Location')) if not video_url: continue # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS - media_type = VideoUrl.get('MediaType') + media_type = video.get('MediaType') + ext = mimetype2ext(video.get('MimeType')) if media_type == 'SS': stream_formats.extend(self._extract_ism_formats( video_url, resource_id, - ism_id='%s-%u.%u' % (stream_id, snum, unum), + ism_id=f'{stream_id}-{snum}.{unum}', fatal=False)) elif media_type == 'Dash': stream_formats.extend(self._extract_mpd_formats( video_url, resource_id, - mpd_id='%s-%u.%u' % (stream_id, snum, unum), + mpd_id=f'{stream_id}-{snum}.{unum}', + fatal=False)) + elif ext in ('m3u', 'm3u8'): + stream_formats.extend(self._extract_m3u8_formats( + video_url, resource_id, + m3u8_id=f'{stream_id}-{snum}.{unum}', fatal=False)) else: stream_formats.append({ - 'format_id': '%s-%u.%u' % (stream_id, snum, unum), + 'format_id': f'{stream_id}-{snum}.{unum}', 'url': video_url, - 'ext': mimetype2ext(VideoUrl.get('MimeType')), + 'ext': ext, }) - if Stream.get('HasSlideContent', False): - images = player_options['PlayerLayoutOptions']['Images'] + images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict})) + if stream.get('HasSlideContent') and images: stream_formats.append(self.__extract_slides( stream_id=stream_id, snum=snum, - Stream=Stream, + stream=stream, duration=presentation.get('Duration'), images=images, )) @@ -254,10 +258,10 @@ class MediasiteIE(InfoExtractor): for fmt in stream_formats: fmt['quality'] = -10 - thumbnail_url = Stream.get('ThumbnailUrl') + thumbnail_url = stream.get('ThumbnailUrl') if thumbnail_url: thumbnails.append({ - 'id': '%s-%u' % (stream_id, snum), + 'id': f'{stream_id}-{snum}', 'url': urljoin(redirect_url, thumbnail_url), 'preference': -1 if stream_type != 0 else 0, }) @@ -278,15 +282,15 @@ class MediasiteIE(InfoExtractor): class MediasiteCatalogIE(InfoExtractor): - _VALID_URL = r'''(?xi) + _VALID_URL = rf'''(?xi) (?P<url>https?://[^/]+/Mediasite) /Catalog/Full/ - (?P<catalog_id>{0}) + (?P<catalog_id>{_ID_RE}) (?: - /(?P<current_folder_id>{0}) - /(?P<root_dynamic_folder_id>{0}) + /(?P<current_folder_id>{_ID_RE}) + /(?P<root_dynamic_folder_id>{_ID_RE}) )? - '''.format(_ID_RE) + ''' _TESTS = [{ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48530d454381549f955d08c75e21', 'info_dict': { @@ -368,7 +372,7 @@ class MediasiteCatalogIE(InfoExtractor): headers[anti_forgery_header] = anti_forgery_token catalog = self._download_json( - '%s/Catalog/Data/GetPresentationsForFolder' % mediasite_url, + f'{mediasite_url}/Catalog/Data/GetPresentationsForFolder', catalog_id, data=json.dumps(data).encode(), headers=headers) entries = [] @@ -379,13 +383,13 @@ class MediasiteCatalogIE(InfoExtractor): if not video_id: continue entries.append(self.url_result( - '%s/Play/%s' % (mediasite_url, video_id), + f'{mediasite_url}/Play/{video_id}', ie=MediasiteIE.ie_key(), video_id=video_id)) title = try_get( - catalog, lambda x: x['CurrentFolder']['Name'], compat_str) + catalog, lambda x: x['CurrentFolder']['Name'], str) - return self.playlist_result(entries, catalog_id, title,) + return self.playlist_result(entries, catalog_id, title) class MediasiteNamedCatalogIE(InfoExtractor): @@ -403,8 +407,8 @@ class MediasiteNamedCatalogIE(InfoExtractor): webpage = self._download_webpage(url, catalog_name) catalog_id = self._search_regex( - r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id') + rf'CatalogId\s*:\s*["\']({_ID_RE})', webpage, 'catalog id') return self.url_result( - '%s/Catalog/Full/%s' % (mediasite_url, catalog_id), + f'{mediasite_url}/Catalog/Full/{catalog_id}', ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id) diff --git a/yt_dlp/extractor/mediaworksnz.py b/yt_dlp/extractor/mediaworksnz.py index 62e37d2..be67b63 100644 --- a/yt_dlp/extractor/mediaworksnz.py +++ b/yt_dlp/extractor/mediaworksnz.py @@ -24,8 +24,8 @@ class MediaWorksNZVODIE(InfoExtractor): 'timestamp': 1604268608, 'upload_date': '20201101', 'thumbnail': r're:^https?://.*\.jpg$', - 'channel': 'George FM' - } + 'channel': 'George FM', + }, }, { # has audio-only format 'url': 'https://vodupload-api.mediaworks.nz/library/asset/published/VID02627', @@ -40,7 +40,7 @@ class MediaWorksNZVODIE(InfoExtractor): 'upload_date': '20220822', 'timestamp': 1661152289, }, - 'params': {'format': 'ba[ext=mp3]'} + 'params': {'format': 'ba[ext=mp3]'}, }] _WEBPAGE_TESTS = [{ @@ -55,7 +55,7 @@ class MediaWorksNZVODIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'Socrates Walks Into A Bar Podcast Episode 1', 'upload_date': '20220720', - } + }, }] @classmethod @@ -63,7 +63,7 @@ class MediaWorksNZVODIE(InfoExtractor): for mobj in re.finditer( rf'''(?x)<div\s+\bid=["']Player-Attributes-JWID[^>]+\b data-request-url=["']{cls._VALID_URL_BASE_RE}["'][^>]+\b - data-asset-id=["']{cls._VALID_URL_ID_RE}["']''', webpage + data-asset-id=["']{cls._VALID_URL_ID_RE}["']''', webpage, ): yield f'https://vodupload-api.mediaworks.nz/library/asset/published/{mobj.group("id")}' diff --git a/yt_dlp/extractor/meipai.py b/yt_dlp/extractor/meipai.py index 1a6f3cd..e4c145c 100644 --- a/yt_dlp/extractor/meipai.py +++ b/yt_dlp/extractor/meipai.py @@ -25,7 +25,7 @@ class MeipaiIE(InfoExtractor): 'view_count': 35511, 'creator': '她她-TATA', 'tags': ['葉子', '阿桑', '余姿昀', '超級女聲'], - } + }, }, { # record of live streaming 'url': 'http://www.meipai.com/media/585526361', @@ -41,7 +41,7 @@ class MeipaiIE(InfoExtractor): 'upload_date': '20160919', 'view_count': 1215, 'creator': '她她-TATA', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/melonvod.py b/yt_dlp/extractor/melonvod.py index 1d3fff8..05d9de8 100644 --- a/yt_dlp/extractor/melonvod.py +++ b/yt_dlp/extractor/melonvod.py @@ -20,7 +20,7 @@ class MelonVODIE(InfoExtractor): }, 'params': { 'skip_download': 'm3u8 download', - } + }, } def _real_extract(self, url): @@ -64,5 +64,5 @@ class MelonVODIE(InfoExtractor): 'thumbnail': thumbnail, 'upload_date': upload_date, 'duration': duration, - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/metacritic.py b/yt_dlp/extractor/metacritic.py index 1441054..41e20a5 100644 --- a/yt_dlp/extractor/metacritic.py +++ b/yt_dlp/extractor/metacritic.py @@ -40,9 +40,9 @@ class MetacriticIE(InfoExtractor): clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) formats = [] - for videoFile in clip.findall('httpURI/videoFile'): - rate_str = videoFile.find('rate').text - video_url = videoFile.find('filePath').text + for video_file in clip.findall('httpURI/videoFile'): + rate_str = video_file.find('rate').text + video_url = video_file.find('filePath').text formats.append({ 'url': video_url, 'ext': 'mp4', diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py index 31ccf00..d5dda06 100644 --- a/yt_dlp/extractor/mgtv.py +++ b/yt_dlp/extractor/mgtv.py @@ -77,13 +77,13 @@ class MGTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) tk2 = base64.urlsafe_b64encode( - f'did={str(uuid.uuid4())}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1] + f'did={uuid.uuid4()}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1] try: api_data = self._download_json( 'https://pcweb.api.mgtv.com/player/video', video_id, query={ 'tk2': tk2, 'video_id': video_id, - 'type': 'pch5' + 'type': 'pch5', }, headers=self.geo_verification_headers())['data'] except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 401: @@ -160,6 +160,6 @@ class MGTVIE(InfoExtractor): subtitles.setdefault(locale.lower(), []).append({ 'url': sub_url, 'name': sub.get('name'), - 'ext': 'srt' + 'ext': 'srt', }) return subtitles diff --git a/yt_dlp/extractor/microsoftembed.py b/yt_dlp/extractor/microsoftembed.py index f71ab3e..d0135f5 100644 --- a/yt_dlp/extractor/microsoftembed.py +++ b/yt_dlp/extractor/microsoftembed.py @@ -1,5 +1,14 @@ +import re + from .common import InfoExtractor -from ..utils import int_or_none, traverse_obj, unified_timestamp +from ..utils import ( + int_or_none, + parse_iso8601, + traverse_obj, + unified_timestamp, + url_basename, + url_or_none, +) class MicrosoftEmbedIE(InfoExtractor): @@ -15,8 +24,8 @@ class MicrosoftEmbedIE(InfoExtractor): 'thumbnail': 'http://img-prod-cms-rt-microsoft-com.akamaized.net/cms/api/am/imageFileData/RWL7Ju?ver=cae5', 'age_limit': 0, 'timestamp': 1631658316, - 'upload_date': '20210914' - } + 'upload_date': '20210914', + }, }] _API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/' @@ -63,3 +72,250 @@ class MicrosoftEmbedIE(InfoExtractor): 'subtitles': subtitles, 'thumbnails': thumbnails, } + + +class MicrosoftMediusBaseIE(InfoExtractor): + @staticmethod + def _sub_to_dict(subtitle_list): + subtitles = {} + for sub in subtitle_list: + subtitles.setdefault(sub.pop('tag', 'und'), []).append(sub) + return subtitles + + def _extract_ism(self, ism_url, video_id): + formats = self._extract_ism_formats(ism_url, video_id) + for fmt in formats: + if fmt['language'] != 'eng' and 'English' not in fmt['format_id']: + fmt['language_preference'] = -10 + return formats + + +class MicrosoftMediusIE(MicrosoftMediusBaseIE): + _VALID_URL = r'https?://medius\.microsoft\.com/Embed/(?:Video\?id=|video-nc/|VideoDetails/)(?P<id>[\da-f-]+)' + + _TESTS = [{ + 'url': 'https://medius.microsoft.com/Embed/video-nc/9640d86c-f513-4889-959e-5dace86e7d2b', + 'info_dict': { + 'id': '9640d86c-f513-4889-959e-5dace86e7d2b', + 'ext': 'ismv', + 'title': 'Rapidly code, test and ship from secure cloud developer environments', + 'description': 'md5:33c8e4facadc438613476eea24165f71', + 'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*', + 'subtitles': 'count:30', + }, + }, { + 'url': 'https://medius.microsoft.com/Embed/video-nc/81215af5-c813-4dcd-aede-94f4e1a7daa3', + 'info_dict': { + 'id': '81215af5-c813-4dcd-aede-94f4e1a7daa3', + 'ext': 'ismv', + 'title': 'Microsoft Build opening', + 'description': 'md5:43455096141077a1f23144cab8cec1cb', + 'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*', + 'subtitles': 'count:31', + }, + }, { + 'url': 'https://medius.microsoft.com/Embed/VideoDetails/78493569-9b3b-4a85-a409-ee76e789e25c', + 'info_dict': { + 'id': '78493569-9b3b-4a85-a409-ee76e789e25c', + 'ext': 'ismv', + 'title': ' Anomaly Detection & Root cause at Edge', + 'description': 'md5:f8f1ad93d7918649bfb97fa081b03b83', + 'thumbnail': r're:https://mediusdownload.event.microsoft.com/asset.*\.jpg.*', + 'subtitles': 'count:17', + }, + }, { + 'url': 'https://medius.microsoft.com/Embed/Video?id=0dc69bda-079b-4070-a7db-a8da1a06a9c7', + 'only_matching': True, + }, { + 'url': 'https://medius.microsoft.com/Embed/video-nc/fe823a91-959c-465b-96d4-8f4db624f72c', + 'only_matching': True, + }] + + def _extract_subtitle(self, webpage, video_id): + captions = traverse_obj( + self._search_json(r'const\s+captionsConfiguration\s*=', webpage, 'captions', video_id, default=None), + ('languageList', lambda _, v: url_or_none(v['src']), { + 'url': 'src', + 'tag': ('srclang', {str}), + 'name': ('kind', {str}), + })) or [{'url': url, 'tag': url_basename(url).split('.vtt')[0].split('_')[-1]} + for url in re.findall(r'var\s+file\s+=\s+\{[^}]+\'(https://[^\']+\.vtt\?[^\']+)', webpage)] + + return self._sub_to_dict(captions) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(f'https://medius.microsoft.com/Embed/video-nc/{video_id}', video_id) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'formats': self._extract_ism( + self._search_regex(r'StreamUrl\s*=\s*"([^"]+manifest)"', webpage, 'ism url'), video_id), + 'thumbnail': self._og_search_thumbnail(webpage), + 'subtitles': self._extract_subtitle(webpage, video_id), + } + + +class MicrosoftLearnPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?(?P<type>shows|events)/(?P<id>[\w-]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners', + 'info_dict': { + 'id': 'bash-for-beginners', + 'title': 'Bash for Beginners', + 'description': 'md5:16a91c07222117d1e00912f0dbc02c2c', + }, + 'playlist_count': 20, + }, { + 'url': 'https://learn.microsoft.com/en-us/events/build-2022', + 'info_dict': { + 'id': 'build-2022', + 'title': 'Microsoft Build 2022 - Events', + 'description': 'md5:c16b43848027df837b22c6fbac7648d3', + }, + 'playlist_count': 201, + }] + + def _entries(self, url_base, video_id): + skip = 0 + while True: + playlist_info = self._download_json(url_base, video_id, f'Downloading entries {skip}', query={ + 'locale': 'en-us', + '$skip': skip, + }) + url_paths = traverse_obj(playlist_info, ('results', ..., 'url', {str})) + for url_path in url_paths: + yield self.url_result(f'https://learn.microsoft.com/en-us{url_path}') + skip += len(url_paths) + if skip >= playlist_info.get('count', 0) or not url_paths: + break + + def _real_extract(self, url): + playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type') + webpage = self._download_webpage(url, playlist_id) + + metainfo = { + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + } + sub_type = 'episodes' if playlist_type == 'shows' else 'sessions' + + url_base = f'https://learn.microsoft.com/api/contentbrowser/search/{playlist_type}/{playlist_id}/{sub_type}' + return self.playlist_result(self._entries(url_base, playlist_id), playlist_id, **metainfo) + + +class MicrosoftLearnEpisodeIE(MicrosoftMediusBaseIE): + _VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?shows/[\w-]+/(?P<id>[^?#/]+)' + _TESTS = [{ + 'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners/what-is-the-difference-between-a-terminal-and-a-shell-2-of-20-bash-for-beginners/', + 'info_dict': { + 'id': 'd44e1a03-a0e5-45c2-9496-5c9fa08dc94c', + 'ext': 'ismv', + 'title': 'What is the Difference Between a Terminal and a Shell? (Part 2 of 20)', + 'description': 'md5:7bbbfb593d21c2cf2babc3715ade6b88', + 'timestamp': 1676339547, + 'upload_date': '20230214', + 'thumbnail': r're:https://learn\.microsoft\.com/video/media/.*\.png', + 'subtitles': 'count:14', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + entry_id = self._html_search_meta('entryId', webpage, 'entryId', fatal=True) + video_info = self._download_json( + f'https://learn.microsoft.com/api/video/public/v1/entries/{entry_id}', video_id) + return { + 'id': entry_id, + 'formats': self._extract_ism(video_info['publicVideo']['adaptiveVideoUrl'], video_id), + 'subtitles': self._sub_to_dict(traverse_obj(video_info, ( + 'publicVideo', 'captions', lambda _, v: url_or_none(v['url']), { + 'tag': ('language', {str}), + 'url': 'url', + }))), + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + **traverse_obj(video_info, { + 'timestamp': ('createTime', {parse_iso8601}), + 'thumbnails': ('publicVideo', 'thumbnailOtherSizes', ..., {'url': {url_or_none}}), + }), + } + + +class MicrosoftLearnSessionIE(InfoExtractor): + _VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?events/[\w-]+/(?P<id>[^?#/]+)' + _TESTS = [{ + 'url': 'https://learn.microsoft.com/en-us/events/build-2022/ts01-rapidly-code-test-ship-from-secure-cloud-developer-environments', + 'info_dict': { + 'id': '9640d86c-f513-4889-959e-5dace86e7d2b', + 'ext': 'ismv', + 'title': 'Rapidly code, test and ship from secure cloud developer environments - Events', + 'description': 'md5:f26c1a85d41c1cffd27a0279254a25c3', + 'timestamp': 1653408600, + 'upload_date': '20220524', + 'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + metainfo = { + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'timestamp': parse_iso8601(self._html_search_meta('startDate', webpage, 'startDate')), + } + + return self.url_result( + self._html_search_meta('externalVideoUrl', webpage, 'videoUrl', fatal=True), + url_transparent=True, ie=MicrosoftMediusIE, **metainfo) + + +class MicrosoftBuildIE(InfoExtractor): + _VALID_URL = [ + r'https?://build\.microsoft\.com/[\w-]+/sessions/(?P<id>[\da-f-]+)', + r'https?://build\.microsoft\.com/[\w-]+/(?P<id>sessions)/?(?:[?#]|$)', + ] + + _TESTS = [{ + 'url': 'https://build.microsoft.com/en-US/sessions/b49feb31-afcd-4217-a538-d3ca1d171198?source=sessions', + 'info_dict': { + 'id': 'aee55fb5-fcf9-4b38-b764-a3527cb57554', + 'ext': 'ismv', + 'title': 'Microsoft Build opening keynote', + 'description': 'md5:d38338f336ef4b6ef9ad2a7466a76655', + 'timestamp': 1716307200, + 'upload_date': '20240521', + 'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*', + }, + }, { + 'url': 'https://build.microsoft.com/en-US/sessions', + 'info_dict': { + 'id': 'sessions', + }, + 'playlist_mincount': 418, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + entries = [ + self.url_result( + video_info['onDemand'], ie=MicrosoftMediusIE, url_transparent=True, **traverse_obj(video_info, { + 'id': ('sessionId', {str}), + 'title': ('title', {str}), + 'description': ('description', {str}), + 'timestamp': ('startDateTime', {parse_iso8601}), + })) + for video_info in self._download_json( + 'https://api-v2.build.microsoft.com/api/session/all/en-US', video_id, 'Downloading video info') + ] + if video_id == 'sessions': + return self.playlist_result(entries, video_id) + else: + return traverse_obj(entries, (lambda _, v: v['id'] == video_id), get_all=False) diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py index f6a0b41..b138810 100644 --- a/yt_dlp/extractor/microsoftstream.py +++ b/yt_dlp/extractor/microsoftstream.py @@ -37,11 +37,11 @@ class MicrosoftStreamIE(InfoExtractor): sub_dict = automatic_captions if track.get('autoGenerated') else subtitles sub_dict.setdefault(track['language'], []).append({ 'ext': 'vtt', - 'url': track.get('url') + 'url': track.get('url'), }) return { 'subtitles': subtitles, - 'automatic_captions': automatic_captions + 'automatic_captions': automatic_captions, } def extract_all_subtitles(self, *args, **kwargs): @@ -66,7 +66,7 @@ class MicrosoftStreamIE(InfoExtractor): f'{api_url}/videos/{video_id}', video_id, headers=headers, query={ '$expand': 'creator,tokens,status,liveEvent,extensions', - 'api-version': '1.4-private' + 'api-version': '1.4-private', }) video_id = video_data.get('id') or video_id language = video_data.get('language') diff --git a/yt_dlp/extractor/microsoftvirtualacademy.py b/yt_dlp/extractor/microsoftvirtualacademy.py deleted file mode 100644 index b759b18..0000000 --- a/yt_dlp/extractor/microsoftvirtualacademy.py +++ /dev/null @@ -1,189 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - smuggle_url, - unsmuggle_url, - xpath_text, -) - - -class MicrosoftVirtualAcademyBaseIE(InfoExtractor): - def _extract_base_url(self, course_id, display_id): - return self._download_json( - 'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id, - display_id, 'Downloading course base URL') - - def _extract_chapter_and_title(self, title): - if not title: - return None, None - m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title) - return (int(m.group('chapter')), m.group('title')) if m else (None, title) - - -class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): - IE_NAME = 'mva' - IE_DESC = 'Microsoft Virtual Academy videos' - _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME - - _TESTS = [{ - 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382', - 'md5': '7826c44fc31678b12ad8db11f6b5abb9', - 'info_dict': { - 'id': 'gfVXISmEB_6804984382', - 'ext': 'mp4', - 'title': 'Course Introduction', - 'formats': 'mincount:3', - 'subtitles': { - 'en': [{ - 'ext': 'ttml', - }], - }, - } - }, { - 'url': 'mva:11788:gfVXISmEB_6804984382', - 'only_matching': True, - }] - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - - mobj = self._match_valid_url(url) - course_id = mobj.group('course_id') - video_id = mobj.group('id') - - base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id) - - settings = self._download_xml( - '%s/content/content_%s/videosettings.xml?v=1' % (base_url, video_id), - video_id, 'Downloading video settings XML') - - _, title = self._extract_chapter_and_title(xpath_text( - settings, './/Title', 'title', fatal=True)) - - formats = [] - - for sources in settings.findall('.//MediaSources'): - sources_type = sources.get('videoType') - for source in sources.findall('./MediaSource'): - video_url = source.text - if not video_url or not video_url.startswith('http'): - continue - if sources_type == 'smoothstreaming': - formats.extend(self._extract_ism_formats( - video_url, video_id, 'mss', fatal=False)) - continue - video_mode = source.get('videoMode') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]$', video_mode or '', 'height', default=None)) - codec = source.get('codec') - acodec, vcodec = [None] * 2 - if codec: - codecs = codec.split(',') - if len(codecs) == 2: - acodec, vcodec = codecs - elif len(codecs) == 1: - vcodec = codecs[0] - formats.append({ - 'url': video_url, - 'format_id': video_mode, - 'height': height, - 'acodec': acodec, - 'vcodec': vcodec, - }) - - subtitles = {} - for source in settings.findall('.//MarkerResourceSource'): - subtitle_url = source.text - if not subtitle_url: - continue - subtitles.setdefault('en', []).append({ - 'url': '%s/%s' % (base_url, subtitle_url), - 'ext': source.get('type'), - }) - - return { - 'id': video_id, - 'title': title, - 'subtitles': subtitles, - 'formats': formats - } - - -class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE): - IE_NAME = 'mva:course' - IE_DESC = 'Microsoft Virtual Academy courses' - _VALID_URL = r'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME - - _TESTS = [{ - 'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', - 'info_dict': { - 'id': '11788', - 'title': 'Microsoft Azure Fundamentals: Virtual Machines', - }, - 'playlist_count': 36, - }, { - # with emphasized chapters - 'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335', - 'info_dict': { - 'id': '16335', - 'title': 'Developing Windows 10 Games with Construct 2', - }, - 'playlist_count': 10, - }, { - 'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', - 'only_matching': True, - }, { - 'url': 'mva:course:11788', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if MicrosoftVirtualAcademyIE.suitable(url) else super( - MicrosoftVirtualAcademyCourseIE, cls).suitable(url) - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - course_id = mobj.group('id') - display_id = mobj.group('display_id') - - base_url = self._extract_base_url(course_id, display_id) - - manifest = self._download_json( - '%s/imsmanifestlite.json' % base_url, - display_id, 'Downloading course manifest JSON')['manifest'] - - organization = manifest['organizations']['organization'][0] - - entries = [] - for chapter in organization['item']: - chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title')) - chapter_id = chapter.get('@identifier') - for item in chapter.get('item', []): - item_id = item.get('@identifier') - if not item_id: - continue - metadata = item.get('resource', {}).get('metadata') or {} - if metadata.get('learningresourcetype') != 'Video': - continue - _, title = self._extract_chapter_and_title(item.get('title')) - duration = parse_duration(metadata.get('duration')) - description = metadata.get('description') - entries.append({ - '_type': 'url_transparent', - 'url': smuggle_url( - 'mva:%s:%s' % (course_id, item_id), {'base_url': base_url}), - 'title': title, - 'description': description, - 'duration': duration, - 'chapter': chapter_title, - 'chapter_number': chapter_number, - 'chapter_id': chapter_id, - }) - - title = organization.get('title') or manifest.get('metadata', {}).get('title') - - return self.playlist_result(entries, course_id, title) diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index caf60c8..88a2b9e 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -18,7 +18,7 @@ class MildomBaseIE(InfoExtractor): def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None): if not self._GUEST_ID: - self._GUEST_ID = f'pc-gp-{str(uuid.uuid4())}' + self._GUEST_ID = f'pc-gp-{uuid.uuid4()}' content = self._download_json( url, video_id, note=note, data=json.dumps(body).encode() if body else None, @@ -150,18 +150,18 @@ class MildomVodIE(MildomBaseIE): 'protocol': 'm3u8_native', 'vcodec': 'none', 'acodec': 'aac', - 'ext': 'm4a' + 'ext': 'm4a', }] for fmt in autoplay['video_link']: formats.append({ - 'format_id': 'video-%s' % fmt['name'], + 'format_id': 'video-{}'.format(fmt['name']), 'url': fmt['url'], 'protocol': 'm3u8_native', 'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'], 'height': fmt['level'], 'vcodec': 'h264', 'acodec': 'aac', - 'ext': 'mp4' + 'ext': 'mp4', }) return { @@ -280,7 +280,7 @@ class MildomUserVodIE(MildomBaseIE): def _real_extract(self, url): user_id = self._match_id(url) - self.to_screen('This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/%s" instead' % user_id) + self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead') profile = self._call_api( 'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id, diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py index 27a6e38..71c82f2 100644 --- a/yt_dlp/extractor/minds.py +++ b/yt_dlp/extractor/minds.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( clean_html, format_field, @@ -16,7 +15,7 @@ class MindsBaseIE(InfoExtractor): api_url = 'https://www.minds.com/api/' + path token = self._get_cookies(api_url).get('XSRF-TOKEN') return self._download_json( - api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={ + api_url, video_id, f'Downloading {resource} JSON metadata', headers={ 'Referer': 'https://www.minds.com/', 'X-XSRF-TOKEN': token.value if token else '', }, query=query) @@ -98,7 +97,7 @@ class MindsIE(MindsBaseIE): uploader_id = owner.get('username') tags = entity.get('tags') - if tags and isinstance(tags, compat_str): + if tags and isinstance(tags, str): tags = [tags] thumbnail = None @@ -135,8 +134,8 @@ class MindsFeedBaseIE(MindsBaseIE): i = 1 while True: data = self._call_api( - 'v2/feeds/container/%s/videos' % feed_id, - feed_id, 'page %s' % i, query) + f'v2/feeds/container/{feed_id}/videos', + feed_id, f'page {i}', query) entities = data.get('entities') or [] for entity in entities: guid = entity.get('guid') @@ -153,7 +152,7 @@ class MindsFeedBaseIE(MindsBaseIE): def _real_extract(self, url): feed_id = self._match_id(url) feed = self._call_api( - 'v1/%s/%s' % (self._FEED_PATH, feed_id), + f'v1/{self._FEED_PATH}/{feed_id}', feed_id, self._FEED_TYPE)[self._FEED_TYPE] return self.playlist_result( diff --git a/yt_dlp/extractor/minoto.py b/yt_dlp/extractor/minoto.py index 032bf3b..6983256 100644 --- a/yt_dlp/extractor/minoto.py +++ b/yt_dlp/extractor/minoto.py @@ -12,7 +12,7 @@ class MinotoIE(InfoExtractor): mobj = self._match_valid_url(url) player_id = mobj.group('player_id') or '1' video_id = mobj.group('id') - video_data = self._download_json('http://play.minoto-video.com/%s/%s.js' % (player_id, video_id), video_id) + video_data = self._download_json(f'http://play.minoto-video.com/{player_id}/{video_id}.js', video_id) video_metadata = video_data['video-metadata'] formats = [] for fmt in video_data['video-files']: diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py index 0a8ee0c..4e24371 100644 --- a/yt_dlp/extractor/mirrativ.py +++ b/yt_dlp/extractor/mirrativ.py @@ -11,7 +11,7 @@ class MirrativBaseIE(InfoExtractor): def assert_error(self, response): error_message = traverse_obj(response, ('status', 'error')) if error_message: - raise ExtractorError('Mirrativ says: %s' % error_message, expected=True) + raise ExtractorError(f'Mirrativ says: {error_message}', expected=True) class MirrativIE(MirrativBaseIE): @@ -42,7 +42,7 @@ class MirrativIE(MirrativBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://www.mirrativ.com/live/%s' % video_id, video_id) + webpage = self._download_webpage(f'https://www.mirrativ.com/live/{video_id}', video_id) live_response = self._download_json(f'https://www.mirrativ.com/api/live/live?live_id={video_id}', video_id) self.assert_error(live_response) @@ -102,7 +102,7 @@ class MirrativUserIE(MirrativBaseIE): # or the service will ban your IP address for a while continue live_id = live.get('live_id') - url = 'https://www.mirrativ.com/live/%s' % live_id + url = f'https://www.mirrativ.com/live/{live_id}' yield self.url_result(url, video_id=live_id, video_title=live.get('title')) page = api_response.get('next_page') diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py index 979584e..e75c540 100644 --- a/yt_dlp/extractor/mit.py +++ b/yt_dlp/extractor/mit.py @@ -28,7 +28,7 @@ class TechTVMITIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) raw_page = self._download_webpage( - 'http://techtv.mit.edu/videos/%s' % video_id, video_id) + f'http://techtv.mit.edu/videos/{video_id}', video_id) clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page) base_url = self._proto_relative_url(self._search_regex( @@ -79,7 +79,7 @@ class OCWMITIE(InfoExtractor): 'upload_date': '20121109', 'uploader_id': 'MIT', 'uploader': 'MIT OpenCourseWare', - } + }, }, { 'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/', @@ -91,8 +91,8 @@ class OCWMITIE(InfoExtractor): 'uploader_id': 'MIT', 'uploader': 'MIT OpenCourseWare', 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.', - } - } + }, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index 58c4a23..9b7c7b8 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -83,7 +83,7 @@ class MixchArchiveIE(InfoExtractor): 'id': '421', 'ext': 'mp4', 'title': '96NEKO SHOW TIME', - } + }, }, { 'url': 'https://mixch.tv/archive/1213', 'skip': 'paid video, no DRM. expires at Dec 31, 2023', @@ -93,7 +93,7 @@ class MixchArchiveIE(InfoExtractor): 'title': '【特別トーク番組アーカイブス】Merm4id×燐舞曲 2nd LIVE「VERSUS」', 'release_date': '20231201', 'thumbnail': str, - } + }, }, { 'url': 'https://mixch.tv/archive/1214', 'only_matching': True, diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py index 8a95d1a..19b7fd4 100644 --- a/yt_dlp/extractor/mixcloud.py +++ b/yt_dlp/extractor/mixcloud.py @@ -1,12 +1,9 @@ +import base64 import itertools +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_ord, - compat_str, - compat_urllib_parse_unquote, -) +from ..compat import compat_ord from ..utils import ( ExtractorError, int_or_none, @@ -25,7 +22,7 @@ class MixcloudBaseIE(InfoExtractor): %s(lookup: {username: "%s"%s}) { %s } -}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields) +}''' % (lookup_key, username, f', slug: "{slug}"' if slug else '', object_fields), # noqa: UP031 })['data'][lookup_key] @@ -91,8 +88,8 @@ class MixcloudIE(MixcloudBaseIE): def _real_extract(self, url): username, slug = self._match_valid_url(url).groups() - username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug) - track_id = '%s_%s' % (username, slug) + username, slug = urllib.parse.unquote(username), urllib.parse.unquote(slug) + track_id = f'{username}_{slug}' cloudcast = self._call_api('cloudcast', '''audioLength comments(first: 100) { @@ -162,7 +159,7 @@ class MixcloudIE(MixcloudBaseIE): if not format_url: continue decrypted = self._decrypt_xor_cipher( - self._DECRYPTION_KEY, compat_b64decode(format_url)) + self._DECRYPTION_KEY, base64.b64decode(format_url)) if url_key == 'hlsUrl': formats.extend(self._extract_m3u8_formats( decrypted, track_id, 'mp4', entry_protocol='m3u8_native', @@ -200,7 +197,7 @@ class MixcloudIE(MixcloudBaseIE): tags = [] for t in cloudcast.get('tags'): - tag = try_get(t, lambda x: x['tag']['name'], compat_str) + tag = try_get(t, lambda x: x['tag']['name'], str) if not tag: tags.append(tag) @@ -213,7 +210,7 @@ class MixcloudIE(MixcloudBaseIE): 'title': title, 'formats': formats, 'description': cloudcast.get('description'), - 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str), + 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], str), 'uploader': owner.get('displayName'), 'timestamp': parse_iso8601(cloudcast.get('publishDate')), 'uploader_id': owner.get('username'), @@ -238,12 +235,12 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE): def _real_extract(self, url): username, slug = self._match_valid_url(url).groups() - username = compat_urllib_parse_unquote(username) + username = urllib.parse.unquote(username) if not slug: slug = 'uploads' else: - slug = compat_urllib_parse_unquote(slug) - playlist_id = '%s_%s' % (username, slug) + slug = urllib.parse.unquote(slug) + playlist_id = f'{username}_{slug}' is_playlist_type = self._ROOT_TYPE == 'playlist' playlist_type = 'items' if is_playlist_type else slug @@ -265,7 +262,7 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE): endCursor hasNextPage } - }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE), + }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE), # noqa: UP031 playlist_id, username, slug if is_playlist_type else None) items = playlist.get(playlist_type) or {} @@ -274,15 +271,15 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE): cloudcast_url = cloudcast.get('url') if not cloudcast_url: continue - item_slug = try_get(cloudcast, lambda x: x['slug'], compat_str) - owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str) + item_slug = try_get(cloudcast, lambda x: x['slug'], str) + owner_username = try_get(cloudcast, lambda x: x['owner']['username'], str) video_id = f'{owner_username}_{item_slug}' if item_slug and owner_username else None entries.append(self.url_result( cloudcast_url, MixcloudIE.ie_key(), video_id)) page_info = items['pageInfo'] has_next_page = page_info['hasNextPage'] - list_filter = ', after: "%s"' % page_info['endCursor'] + list_filter = ', after: "{}"'.format(page_info['endCursor']) return self.playlist_result( entries, playlist_id, @@ -351,7 +348,7 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE): owner { username }''' def _get_playlist_title(self, title, slug): - return '%s (%s)' % (title, slug) + return f'{title} ({slug})' class MixcloudPlaylistIE(MixcloudPlaylistBaseIE): diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index d715b97..6f67602 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -9,9 +9,10 @@ from ..utils import ( join_nonempty, parse_duration, parse_iso8601, - traverse_obj, try_get, + url_or_none, ) +from ..utils.traversal import traverse_obj class MLBBaseIE(InfoExtractor): @@ -203,7 +204,7 @@ class MLBIE(MLBBaseIE): def _download_video_data(self, display_id): return self._download_json( - 'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id, + f'http://content.mlb.com/mlb/item/id/v1/{display_id}/details/web-v1.json', display_id) @@ -227,7 +228,7 @@ class MLBVideoIE(MLBBaseIE): @classmethod def suitable(cls, url): - return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url) + return False if MLBIE.suitable(url) else super().suitable(url) @staticmethod def _get_feed(video): @@ -268,7 +269,7 @@ class MLBVideoIE(MLBBaseIE): timestamp title } -}''' % display_id, +}''' % display_id, # noqa: UP031 })['data']['mediaPlayback'][0] @@ -300,14 +301,14 @@ class MLBTVIE(InfoExtractor): 'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None, headers={ 'User-Agent': 'okhttp/3.12.1', - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }, data=data.encode())['access_token'] entitlement = self._download_webpage( - f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={str(uuid.uuid4())}', None, + f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None, headers={ 'User-Agent': 'okhttp/3.12.1', - 'Authorization': f'Bearer {access_token}' + 'Authorization': f'Bearer {access_token}', }) data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv' @@ -316,7 +317,7 @@ class MLBTVIE(InfoExtractor): headers={ 'Accept': 'application/json', 'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk', - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }, data=data.encode())['access_token'] def _real_extract(self, url): @@ -326,15 +327,20 @@ class MLBTVIE(InfoExtractor): video_id)['data']['Airings'] formats, subtitles = [], {} - for airing in airings: - m3u8_url = self._download_json( + for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']): + format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing) + m3u8_url = traverse_obj(self._download_json( airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id, - headers={ + note=f'Downloading {format_id} stream info JSON', + errnote=f'Failed to download {format_id} stream info, skipping', + fatal=False, headers={ 'Authorization': self._access_token, - 'Accept': 'application/vnd.media-service+json; version=2' - })['stream']['complete'] + 'Accept': 'application/vnd.media-service+json; version=2', + }), ('stream', 'complete', {url_or_none})) + if not m3u8_url: + continue f, s = self._extract_m3u8_formats_and_subtitles( - m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage'))) + m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) formats.extend(f) self._merge_subtitles(s, target=subtitles) diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py index 9383f13..a0b141a 100644 --- a/yt_dlp/extractor/mlssoccer.py +++ b/yt_dlp/extractor/mlssoccer.py @@ -3,7 +3,7 @@ from .common import InfoExtractor class MLSSoccerIE(InfoExtractor): _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)' - _VALID_URL = r'https?://(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS + _VALID_URL = rf'https?://(?:www\.)?{_VALID_DOMAINS}/video/#?(?P<id>[^/&$#?]+)' _TESTS = [{ 'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986', @@ -19,96 +19,97 @@ class MLSSoccerIE(InfoExtractor): 'tags': ['club/canada'], 'is_live': False, 'upload_date': '20211007', - 'filesize_approx': 255193528.83200002 + 'filesize_approx': 255193528.83200002, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.whitecapsfc.com/video/highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021#highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.torontofc.ca/video/highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733#highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.sportingkc.com/video/post-match-press-conference-john-pulskamp-oct-27-2021#post-match-press-conference-john-pulskamp-oct-27-2021', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.soundersfc.com/video/highlights-seattle-sounders-fc-vs-sporting-kansas-city-october-23-2021', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.sjearthquakes.com/video/#highlights-austin-fc-vs-san-jose-earthquakes-june-19-2021', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.rsl.com/video/2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21#2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.timbers.com/video/highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose#highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.philadelphiaunion.com/video/highlights-torvphi', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.orlandocitysc.com/video/highlight-columbus-crew-vs-orlando-city-sc', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.newyorkredbulls.com/video/all-access-matchday-double-derby-week#all-access-matchday-double-derby-week', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.nycfc.com/video/highlights-nycfc-1-0-chicago-fire-fc#highlights-nycfc-1-0-chicago-fire-fc', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.revolutionsoccer.net/video/two-minute-highlights-revs-1-rapids-0-october-27-2021#two-minute-highlights-revs-1-rapids-0-october-27-2021', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.nashvillesc.com/video/goal-c-j-sapong-nashville-sc-92nd-minute', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.cfmontreal.com/video/faits-saillants-tor-v-mtl#faits-saillants-orl-v-mtl-x5645', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.intermiamicf.com/video/all-access-victory-vs-nashville-sc-by-ukg#all-access-victory-vs-nashville-sc-by-ukg', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.lagalaxy.com/video/#moment-of-the-month-presented-by-san-manuel-casino-rayan-raveloson-scores-his-se', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.lafc.com/video/breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season#breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.houstondynamofc.com/video/postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660#postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.dcunited.com/video/tony-alfaro-my-family-pushed-me-to-believe-everything-was-possible', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.fcdallas.com/video/highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021#highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.columbuscrew.com/video/match-rewind-columbus-crew-vs-new-york-red-bulls-october-23-2021', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.coloradorapids.com/video/postgame-reaction-robin-fraser-october-27#postgame-reaction-robin-fraser-october-27', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.fccincinnati.com/video/#keeping-cincy-chill-presented-by-coors-lite', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.chicagofirefc.com/video/all-access-fire-score-dramatic-road-win-in-cincy#all-access-fire-score-dramatic-road-win-in-cincy', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.austinfc.com/video/highlights-colorado-rapids-vs-austin-fc-september-29-2021#highlights-colorado-rapids-vs-austin-fc-september-29-2021', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.atlutd.com/video/goal-josef-martinez-scores-in-the-73rd-minute#goal-josef-martinez-scores-in-the-73rd-minute', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) - data_json = self._parse_json(self._html_search_regex(r'data-options\=\"([^\"]+)\"', webpage, 'json'), id)['videoList'][0] + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + data_json = self._parse_json( + self._html_search_regex(r'data-options\=\"([^\"]+)\"', webpage, 'json'), video_id)['videoList'][0] return { - 'id': id, + 'id': video_id, '_type': 'url', - 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (data_json['accountId'], data_json['videoId']), + 'url': 'https://players.brightcove.net/{}/default_default/index.html?videoId={}'.format(data_json['accountId'], data_json['videoId']), 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/mocha.py b/yt_dlp/extractor/mocha.py index 2fbc0e9..c3f4055 100644 --- a/yt_dlp/extractor/mocha.py +++ b/yt_dlp/extractor/mocha.py @@ -20,8 +20,8 @@ class MochaVideoIE(InfoExtractor): 'timestamp': 1652254203, 'upload_date': '20220511', 'comment_count': int, - 'categories': ['Kids'] - } + 'categories': ['Kids'], + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/mojvideo.py b/yt_dlp/extractor/mojvideo.py index d47ad07..6bc362a 100644 --- a/yt_dlp/extractor/mojvideo.py +++ b/yt_dlp/extractor/mojvideo.py @@ -17,7 +17,7 @@ class MojvideoIE(InfoExtractor): 'title': 'V avtu pred mano rdečelaska - Alfi Nipič', 'thumbnail': r're:^http://.*\.jpg$', 'duration': 242, - } + }, } def _real_extract(self, url): @@ -27,12 +27,12 @@ class MojvideoIE(InfoExtractor): # XML is malformed playerapi = self._download_webpage( - 'http://www.mojvideo.com/playerapi.php?v=%s&t=1' % video_id, display_id) + f'http://www.mojvideo.com/playerapi.php?v={video_id}&t=1', display_id) if '<error>true</error>' in playerapi: error_desc = self._html_search_regex( r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {error_desc}', expected=True) title = self._html_extract_title(playerapi) video_url = self._html_search_regex( diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py index 411d41c..930c13e 100644 --- a/yt_dlp/extractor/monstercat.py +++ b/yt_dlp/extractor/monstercat.py @@ -27,7 +27,7 @@ class MonstercatIE(InfoExtractor): 'release_date': '20230711', 'album': 'The Secret Language of Trees', 'album_artist': 'BT', - } + }, }] def _extract_tracks(self, table, album_meta): @@ -41,7 +41,7 @@ class MonstercatIE(InfoExtractor): track_number = int_or_none(try_call(lambda: get_element_by_class('py-xsmall', td))) if not track_id or not release_id: self.report_warning(f'Skipping track {track_number}, ID(s) not found') - self.write_debug(f'release_id={repr(release_id)} track_id={repr(track_id)}') + self.write_debug(f'release_id={release_id!r} track_id={track_id!r}') continue yield { **album_meta, @@ -51,7 +51,7 @@ class MonstercatIE(InfoExtractor): 'artist': clean_html(try_call(lambda: get_element_by_class('d-block fs-xxsmall', td))), 'url': f'https://www.monstercat.com/api/release/{release_id}/track-stream/{track_id}', 'id': track_id, - 'ext': 'mp3' + 'ext': 'mp3', } def _real_extract(self, url): diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py index b6c18fe..8655195 100644 --- a/yt_dlp/extractor/motherless.py +++ b/yt_dlp/extractor/motherless.py @@ -115,10 +115,10 @@ class MotherlessIE(InfoExtractor): if any(p in webpage for p in ( '<title>404 - MOTHERLESS.COM<', ">The page you're looking for cannot be found.<")): - raise ExtractorError('Video %s does not exist' % video_id, expected=True) + raise ExtractorError(f'Video {video_id} does not exist', expected=True) if '>The content you are trying to view is for friends only.' in webpage: - raise ExtractorError('Video %s is for friends only' % video_id, expected=True) + raise ExtractorError(f'Video {video_id} is for friends only', expected=True) title = self._html_search_regex( (r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>', @@ -127,7 +127,7 @@ class MotherlessIE(InfoExtractor): (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'), webpage, 'video URL', default=None, group='url') - or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id) + or f'http://cdn4.videos.motherlessmedia.com/videos/{video_id}.mp4?fs=opencloud') age_limit = self._rta_search(webpage) view_count = str_to_int(self._html_search_regex( (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'), diff --git a/yt_dlp/extractor/motorsport.py b/yt_dlp/extractor/motorsport.py index 167d85f..0178367 100644 --- a/yt_dlp/extractor/motorsport.py +++ b/yt_dlp/extractor/motorsport.py @@ -1,7 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_urlparse, -) class MotorsportIE(InfoExtractor): @@ -19,7 +18,7 @@ class MotorsportIE(InfoExtractor): 'uploader': 'mcomstaff', 'uploader_id': 'UC334JIYKkVnyFoNCclfZtHQ', 'upload_date': '20140903', - 'thumbnail': r're:^https?://.+\.jpg$' + 'thumbnail': r're:^https?://.+\.jpg$', }, 'add_ie': ['Youtube'], 'params': { @@ -40,7 +39,7 @@ class MotorsportIE(InfoExtractor): return self.url_result(iframe_path) iframe = self._download_webpage( - compat_urlparse.urljoin(url, iframe_path), display_id, + urllib.parse.urljoin(url, iframe_path), display_id, 'Downloading iframe') youtube_id = self._search_regex( r'www.youtube.com/embed/(.{11})', iframe, 'youtube id') @@ -48,5 +47,5 @@ class MotorsportIE(InfoExtractor): return { '_type': 'url_transparent', 'display_id': display_id, - 'url': 'https://youtube.com/watch?v=%s' % youtube_id, + 'url': f'https://youtube.com/watch?v={youtube_id}', } diff --git a/yt_dlp/extractor/moview.py b/yt_dlp/extractor/moview.py index 678b2eb..560154e 100644 --- a/yt_dlp/extractor/moview.py +++ b/yt_dlp/extractor/moview.py @@ -16,7 +16,7 @@ class MoviewPlayIE(JixieBaseIE): 'title': 'Candy Monster', 'description': 'Mengapa Candy Monster ingin mengambil permen Chloe?', 'thumbnail': 'https://video.jixie.media/1034/146182/146182_1280x720.jpg', - } + }, }, { # non-drm hls 'url': 'https://www.moview.id/play/75/Paris-Van-Java-Episode-16', @@ -29,8 +29,8 @@ class MoviewPlayIE(JixieBaseIE): 'thumbnail': 'https://video.jixie.media/1003/28210/28210_1280x720.jpg', 'description': 'md5:2a5e18d98eef9b39d7895029cac96c63', 'title': 'Paris Van Java Episode 16', - } - } + }, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/moviezine.py b/yt_dlp/extractor/moviezine.py index cffcdcf..331a562 100644 --- a/yt_dlp/extractor/moviezine.py +++ b/yt_dlp/extractor/moviezine.py @@ -20,7 +20,7 @@ class MoviezineIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player') + jsplayer = self._download_webpage(f'http://www.moviezine.se/api/player.js?video={video_id}', video_id, 'Downloading js api player') formats = [{ 'format_id': 'sd', diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py index 6e0ea26..7b1c797 100644 --- a/yt_dlp/extractor/movingimage.py +++ b/yt_dlp/extractor/movingimage.py @@ -31,7 +31,7 @@ class MovingImageIE(InfoExtractor): def search_field(field_name, fatal=False): return self._search_regex( - r'<span\s+class="field_title">%s:</span>\s*<span\s+class="field_content">([^<]+)</span>' % field_name, + rf'<span\s+class="field_title">{field_name}:</span>\s*<span\s+class="field_content">([^<]+)</span>', webpage, 'title', fatal=fatal) title = unescapeHTML(search_field('Title', fatal=True)).strip('()[]') diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py index 79728e1..dd86495 100644 --- a/yt_dlp/extractor/msn.py +++ b/yt_dlp/extractor/msn.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, determine_ext, @@ -139,7 +138,7 @@ class MSNIE(InfoExtractor): format_code = file_.get('formatCode') if not format_url or not format_code: continue - if compat_str(format_code) == '3100': + if str(format_code) == '3100': subtitles.setdefault(file_.get('culture', 'en'), []).append({ 'ext': determine_ext(format_url, 'ttml'), 'url': format_url, @@ -163,6 +162,6 @@ class MSNIE(InfoExtractor): error = unescapeHTML(self._search_regex( r'data-error=(["\'])(?P<error>.+?)\1', webpage, 'error', group='error')) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {error}', expected=True) return self.playlist_result(entries, page_id) diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index 404e431..34e015d 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -2,7 +2,6 @@ import re import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import compat_str from ..networking import HEADRequest, Request from ..utils import ( ExtractorError, @@ -23,7 +22,7 @@ from ..utils import ( def _media_xml_tag(tag): - return '{http://search.yahoo.com/mrss/}%s' % tag + return f'{{http://search.yahoo.com/mrss/}}{tag}' class MTVServicesInfoExtractor(InfoExtractor): @@ -43,7 +42,7 @@ class MTVServicesInfoExtractor(InfoExtractor): return self._FEED_URL def _get_thumbnail_url(self, uri, itemdoc): - search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) + search_path = '{}/{}'.format(_media_xml_tag('group'), _media_xml_tag('thumbnail')) thumb_node = itemdoc.find(search_path) if thumb_node is None: return None @@ -61,7 +60,7 @@ class MTVServicesInfoExtractor(InfoExtractor): response = self._request_webpage(req, mtvn_id, 'Resolving url') url = response.url # Transform the url to get the best quality: - url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1) + url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, count=1) return [{'url': url, 'ext': 'mp4'}] def _extract_video_formats(self, mdoc, mtvn_id, video_id): @@ -87,7 +86,7 @@ class MTVServicesInfoExtractor(InfoExtractor): rtmp_video_url = rendition.find('./src').text if 'error_not_available.swf' in rtmp_video_url: raise ExtractorError( - '%s said: video is not available' % self.IE_NAME, + f'{self.IE_NAME} said: video is not available', expected=True) if rtmp_video_url.endswith('siteunavail.png'): continue @@ -118,8 +117,8 @@ class MTVServicesInfoExtractor(InfoExtractor): if ext == 'cea-608': ext = 'scc' subtitles.setdefault(lang, []).append({ - 'url': compat_str(sub_src), - 'ext': ext + 'url': str(sub_src), + 'ext': ext, }) return subtitles @@ -127,7 +126,7 @@ class MTVServicesInfoExtractor(InfoExtractor): uri = itemdoc.find('guid').text video_id = self._id_from_uri(uri) self.report_extraction(video_id) - content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content'))) + content_el = itemdoc.find('{}/{}'.format(_media_xml_tag('group'), _media_xml_tag('content'))) mediagen_url = self._remove_template_parameter(content_el.attrib['url']) mediagen_url = mediagen_url.replace('device={device}', '') if 'acceptMethods' not in mediagen_url: @@ -143,9 +142,9 @@ class MTVServicesInfoExtractor(InfoExtractor): item = mediagen_doc.find('./video/item') if item is not None and item.get('type') == 'text': - message = '%s returned error: ' % self.IE_NAME + message = f'{self.IE_NAME} returned error: ' if item.get('code') is not None: - message += '%s - ' % item.get('code') + message += '{} - '.format(item.get('code')) message += item.text raise ExtractorError(message, expected=True) @@ -184,7 +183,7 @@ class MTVServicesInfoExtractor(InfoExtractor): episode = episode.text if episode is not None else None if season and episode: # episode number includes season, so remove it - episode = re.sub(r'^%s' % season, '', episode) + episode = re.sub(rf'^{season}', '', episode) # This a short id that's used in the webpage urls mtvn_id = None @@ -255,7 +254,7 @@ class MTVServicesInfoExtractor(InfoExtractor): feed_url = try_get( triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'], - compat_str) + str) if not feed_url: return @@ -263,7 +262,7 @@ class MTVServicesInfoExtractor(InfoExtractor): if not feed: return - return try_get(feed, lambda x: x['result']['data']['id'], compat_str) + return try_get(feed, lambda x: x['result']['data']['id'], str) @staticmethod def _extract_child_with_type(parent, t): @@ -320,8 +319,7 @@ class MTVServicesInfoExtractor(InfoExtractor): title = url_basename(url) webpage = self._download_webpage(url, title) mgid = self._extract_mgid(webpage) - videos_info = self._get_videos_info(mgid, url=url) - return videos_info + return self._get_videos_info(mgid, url=url) class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): @@ -346,7 +344,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): def _get_feed_url(self, uri, url=None): video_id = self._id_from_uri(uri) config = self._download_json( - 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id) + f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge', video_id) return self._remove_template_parameter(config['feedWithQueryParams']) def _real_extract(self, url): @@ -443,8 +441,8 @@ class MTVVideoIE(MTVServicesInfoExtractor): r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage) if m_vevo: vevo_id = m_vevo.group(1) - self.to_screen('Vevo video detected: %s' % vevo_id) - return self.url_result('vevo:%s' % vevo_id, ie='Vevo') + self.to_screen(f'Vevo video detected: {vevo_id}') + return self.url_result(f'vevo:{vevo_id}', ie='Vevo') uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri') return self._get_videos_info(uri) @@ -577,9 +575,9 @@ class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete I def _get_entries(self, title, url): while True: pg = self._search_regex(r'/(\d+)$', url, 'entries', '1') - entries = self._download_json(url, title, 'page %s' % pg) + entries = self._download_json(url, title, f'page {pg}') url = try_get( - entries, lambda x: x['result']['nextPageURL'], compat_str) + entries, lambda x: x['result']['nextPageURL'], str) entries = try_get( entries, ( lambda x: x['result']['data']['items'], @@ -598,15 +596,15 @@ class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete I info = self._download_json(info_url, video_id).get('manifest') redirect = try_get( - info, lambda x: x['newLocation']['url'], compat_str) + info, lambda x: x['newLocation']['url'], str) if redirect: return self.url_result(redirect) title = info.get('title') video_id = try_get( - info, lambda x: x['reporting']['itemId'], compat_str) + info, lambda x: x['reporting']['itemId'], str) parent_id = try_get( - info, lambda x: x['reporting']['parentId'], compat_str) + info, lambda x: x['reporting']['parentId'], str) playlist_url = current_url = None for z in (info.get('zones') or {}).values(): @@ -630,15 +628,15 @@ class MTVItaliaProgrammaIE(MTVItaliaIE): # XXX: Do not subclass from concrete I info, ( lambda x: x['title'], lambda x: x['headline']), - compat_str) - description = try_get(info, lambda x: x['content'], compat_str) + str) + description = try_get(info, lambda x: x['content'], str) if current_url: season = try_get( self._download_json(playlist_url, video_id, 'Seasons info'), lambda x: x['result']['data'], dict) current = try_get( - season, lambda x: x['currentSeason'], compat_str) + season, lambda x: x['currentSeason'], str) seasons = try_get( season, lambda x: x['seasons'], list) or [] diff --git a/yt_dlp/extractor/muenchentv.py b/yt_dlp/extractor/muenchentv.py index 934cd4f..5d2dd39 100644 --- a/yt_dlp/extractor/muenchentv.py +++ b/yt_dlp/extractor/muenchentv.py @@ -20,11 +20,11 @@ class MuenchenTVIE(InfoExtractor): 'ext': 'mp4', 'title': 're:^münchen.tv-Livestream [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, - 'thumbnail': r're:^https?://.*\.jpg$' + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { 'skip_download': True, - } + }, } def _real_extract(self, url): @@ -47,12 +47,12 @@ class MuenchenTVIE(InfoExtractor): ext = determine_ext(s['file'], None) label_str = s.get('label') if label_str is None: - label_str = '_%d' % format_num + label_str = f'_{format_num}' if ext is None: format_id = label_str else: - format_id = '%s-%s' % (ext, label_str) + format_id = f'{ext}-{label_str}' formats.append({ 'url': s['file'], diff --git a/yt_dlp/extractor/murrtube.py b/yt_dlp/extractor/murrtube.py index 74365c0..3b39a1b 100644 --- a/yt_dlp/extractor/murrtube.py +++ b/yt_dlp/extractor/murrtube.py @@ -36,7 +36,7 @@ class MurrtubeIE(InfoExtractor): 'view_count': int, 'like_count': int, 'tags': ['hump', 'breed', 'Fursuit', 'murrsuit', 'bareback'], - } + }, } def _download_gql(self, video_id, op, note=None, fatal=True): @@ -129,14 +129,14 @@ query Media($q: String, $sort: String, $userId: ID, $offset: Int!, $limit: Int!) __typename } }'''}, - 'Downloading page {0}'.format(page + 1)) + f'Downloading page {page + 1}') if data is None: raise ExtractorError(f'Failed to retrieve video list for page {page + 1}') media = data['media'] for entry in media: - yield self.url_result('murrtube:{0}'.format(entry['id']), MurrtubeIE.ie_key()) + yield self.url_result('murrtube:{}'.format(entry['id']), MurrtubeIE.ie_key()) def _real_extract(self, url): username = self._match_id(url) diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py index 289ae57..0ef2fa0 100644 --- a/yt_dlp/extractor/musescore.py +++ b/yt_dlp/extractor/musescore.py @@ -13,7 +13,7 @@ class MuseScoreIE(InfoExtractor): 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'PapyPiano', 'creator': 'Wolfgang Amadeus Mozart', - } + }, }, { 'url': 'https://musescore.com/user/36164500/scores/6837638', 'info_dict': { @@ -24,7 +24,7 @@ class MuseScoreIE(InfoExtractor): 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'roxbelviolin', 'creator': 'Guns N´Roses Arr. Roxbel Violin', - } + }, }, { 'url': 'https://musescore.com/classicman/fur-elise', 'info_dict': { @@ -35,7 +35,7 @@ class MuseScoreIE(InfoExtractor): 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'ClassicMan', 'creator': 'Ludwig van Beethoven (1770–1827)', - } + }, }, { 'url': 'https://musescore.com/minh_cuteee/scores/6555384', 'only_matching': True, @@ -44,8 +44,8 @@ class MuseScoreIE(InfoExtractor): def _real_extract(self, url): webpage = self._download_webpage(url, None) url = self._og_search_url(webpage) or url - id = self._match_id(url) - mp3_url = self._download_json(f'https://musescore.com/api/jmuse?id={id}&index=0&type=mp3&v2=1', id, + video_id = self._match_id(url) + mp3_url = self._download_json(f'https://musescore.com/api/jmuse?id={video_id}&index=0&type=mp3&v2=1', video_id, headers={'authorization': '63794e5461e4cfa046edfbdddfccc1ac16daffd2'})['info']['url'] formats = [{ 'url': mp3_url, @@ -54,7 +54,7 @@ class MuseScoreIE(InfoExtractor): }] return { - 'id': id, + 'id': video_id, 'formats': formats, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), diff --git a/yt_dlp/extractor/musicdex.py b/yt_dlp/extractor/musicdex.py index a863514..5ca390e 100644 --- a/yt_dlp/extractor/musicdex.py +++ b/yt_dlp/extractor/musicdex.py @@ -8,9 +8,9 @@ from ..utils import ( class MusicdexBaseIE(InfoExtractor): - def _return_info(self, track_json, album_json, id): + def _return_info(self, track_json, album_json, video_id): return { - 'id': str(id), + 'id': str(video_id), 'title': track_json.get('name'), 'track': track_json.get('name'), 'description': track_json.get('description'), @@ -50,15 +50,16 @@ class MusicdexSongIE(MusicdexBaseIE): 'album_artists': ['fripSide'], 'thumbnail': 'https://www.musicdex.org/storage/album/9iDIam1DHTVqUG4UclFIEq1WAFGXfPW4y0TtZa91.png', 'album': 'To Aru Kagaku no Railgun T OP2 Single - dual existence', - 'release_year': 2020 + 'release_year': 2020, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): - id = self._match_id(url) - data_json = self._download_json(f'https://www.musicdex.org/secure/tracks/{id}?defaultRelations=true', id)['track'] - return self._return_info(data_json, data_json.get('album') or {}, id) + video_id = self._match_id(url) + data_json = self._download_json( + f'https://www.musicdex.org/secure/tracks/{video_id}?defaultRelations=true', video_id)['track'] + return self._return_info(data_json, data_json.get('album') or {}, video_id) class MusicdexAlbumIE(MusicdexBaseIE): @@ -79,13 +80,15 @@ class MusicdexAlbumIE(MusicdexBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - data_json = self._download_json(f'https://www.musicdex.org/secure/albums/{id}?defaultRelations=true', id)['album'] - entries = [self._return_info(track, data_json, track['id']) for track in data_json.get('tracks') or [] if track.get('id')] + playlist_id = self._match_id(url) + data_json = self._download_json( + f'https://www.musicdex.org/secure/albums/{playlist_id}?defaultRelations=true', playlist_id)['album'] + entries = [self._return_info(track, data_json, track['id']) + for track in data_json.get('tracks') or [] if track.get('id')] return { '_type': 'playlist', - 'id': id, + 'id': playlist_id, 'title': data_json.get('name'), 'description': data_json.get('description'), 'genres': [genre.get('name') for genre in data_json.get('genres') or []], @@ -98,12 +101,11 @@ class MusicdexAlbumIE(MusicdexBaseIE): class MusicdexPageIE(MusicdexBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor - def _entries(self, id): - next_page_url = self._API_URL % id + def _entries(self, playlist_id): + next_page_url = self._API_URL % playlist_id while next_page_url: - data_json = self._download_json(next_page_url, id)['pagination'] - for data in data_json.get('data') or []: - yield data + data_json = self._download_json(next_page_url, playlist_id)['pagination'] + yield from data_json.get('data') or [] next_page_url = data_json.get('next_page_url') @@ -123,15 +125,15 @@ class MusicdexArtistIE(MusicdexPageIE): }] def _real_extract(self, url): - id = self._match_id(url) - data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{id}', id)['artist'] + playlist_id = self._match_id(url) + data_json = self._download_json(f'https://www.musicdex.org/secure/artists/{playlist_id}', playlist_id)['artist'] entries = [] - for album in self._entries(id): + for album in self._entries(playlist_id): entries.extend(self._return_info(track, album, track['id']) for track in album.get('tracks') or [] if track.get('id')) return { '_type': 'playlist', - 'id': id, + 'id': playlist_id, 'title': data_json.get('name'), 'view_count': data_json.get('plays'), 'thumbnail': format_field(data_json, 'image_small', 'https://www.musicdex.org/%s'), @@ -156,14 +158,14 @@ class MusicdexPlaylistIE(MusicdexPageIE): }] def _real_extract(self, url): - id = self._match_id(url) - data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{id}', id)['playlist'] + playlist_id = self._match_id(url) + data_json = self._download_json(f'https://www.musicdex.org/secure/playlists/{playlist_id}', playlist_id)['playlist'] entries = [self._return_info(track, track.get('album') or {}, track['id']) - for track in self._entries(id) or [] if track.get('id')] + for track in self._entries(playlist_id) or [] if track.get('id')] return { '_type': 'playlist', - 'id': id, + 'id': playlist_id, 'title': data_json.get('name'), 'description': data_json.get('description'), 'view_count': data_json.get('plays'), diff --git a/yt_dlp/extractor/mx3.py b/yt_dlp/extractor/mx3.py index cb9f50e..5c42f4d 100644 --- a/yt_dlp/extractor/mx3.py +++ b/yt_dlp/extractor/mx3.py @@ -94,7 +94,7 @@ class Mx3IE(Mx3BaseIE): 'title': "S'envoler", 'release_year': 2021, 'tags': [], - } + }, }, { 'url': 'https://mx3.ch/t/1LIY', 'md5': '48293cb908342547827f963a5a2e9118', @@ -110,7 +110,7 @@ class Mx3IE(Mx3BaseIE): 'release_year': 2023, 'tags': ['the broots', 'cassata records', 'larytta'], 'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023', - } + }, }, { 'url': 'https://mx3.ch/t/1C6E', 'md5': '1afcd578493ddb8e5008e94bb6d97e25', @@ -125,7 +125,7 @@ class Mx3IE(Mx3BaseIE): 'title': 'Wide Awake', 'release_year': 2021, 'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'], - } + }, }] @@ -146,7 +146,7 @@ class Mx3NeoIE(Mx3BaseIE): 'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252', 'release_year': 2023, 'tags': [], - } + }, }] @@ -167,5 +167,5 @@ class Mx3VolksmusikIE(Mx3BaseIE): 'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120', 'release_year': 2012, 'tags': [], - } + }, }] diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py index 1fdb08e..8d3e35a 100644 --- a/yt_dlp/extractor/mxplayer.py +++ b/yt_dlp/extractor/mxplayer.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, traverse_obj, @@ -23,7 +22,7 @@ class MxplayerIE(InfoExtractor): 'duration': 2451, 'season': 'Season 1', 'series': 'My Girlfriend Is An Alien (Hindi Dubbed)', - 'episode': 'Episode 1' + 'episode': 'Episode 1', }, 'params': { 'format': 'bv', @@ -56,13 +55,13 @@ class MxplayerIE(InfoExtractor): 'duration': 2332, 'season': 'Season 1', 'series': 'Shaitaan', - 'episode': 'Episode 1' + 'episode': 'Episode 1', }, 'params': { 'format': 'best', 'skip_download': True, }, - 'skip': 'No longer available.' + 'skip': 'No longer available.', }, { 'url': 'https://www.mxplayer.in/show/watch-aashram/chapter-1/duh-swapna-online-d445579792b0135598ba1bc9088a84cb', 'info_dict': { @@ -76,7 +75,7 @@ class MxplayerIE(InfoExtractor): 'duration': 2568, 'season': 'Season 1', 'series': 'Aashram', - 'episode': 'Episode 3' + 'episode': 'Episode 3', }, 'params': { 'format': 'bv', @@ -95,7 +94,7 @@ class MxplayerIE(InfoExtractor): 'duration': 1305, 'season': 'Season 1', 'series': 'Dangerous', - 'episode': 'Episode 1' + 'episode': 'Episode 1', }, 'params': { 'format': 'bv', @@ -114,7 +113,7 @@ class MxplayerIE(InfoExtractor): 'format': 'best', 'skip_download': True, }, - 'skip': 'No longer available. Cannot be played on browser' + 'skip': 'No longer available. Cannot be played on browser', }, { 'url': 'https://www.mxplayer.in/movie/watch-kitne-door-kitne-paas-movie-online-a9e9c76c566205955f70d8b2cb88a6a2', 'info_dict': { @@ -206,11 +205,11 @@ class MxplayerShowIE(InfoExtractor): 'info_dict': { 'id': 'a8f44e3cc0814b5601d17772cedf5417', 'title': 'Watch Chakravartin Ashoka Samrat Series Online', - } + }, }] - _API_SHOW_URL = "https://api.mxplay.com/v1/web/detail/tab/tvshowseasons?type=tv_show&id={}&device-density=2&platform=com.mxplay.desktop&content-languages=hi,en" - _API_EPISODES_URL = "https://api.mxplay.com/v1/web/detail/tab/tvshowepisodes?type=season&id={}&device-density=1&platform=com.mxplay.desktop&content-languages=hi,en&{}" + _API_SHOW_URL = 'https://api.mxplay.com/v1/web/detail/tab/tvshowseasons?type=tv_show&id={}&device-density=2&platform=com.mxplay.desktop&content-languages=hi,en' + _API_EPISODES_URL = 'https://api.mxplay.com/v1/web/detail/tab/tvshowepisodes?type=season&id={}&device-density=1&platform=com.mxplay.desktop&content-languages=hi,en&{}' def _entries(self, show_id): show_json = self._download_json( @@ -218,7 +217,7 @@ class MxplayerShowIE(InfoExtractor): video_id=show_id, headers={'Referer': 'https://mxplayer.in'}) page_num = 0 for season in show_json.get('items') or []: - season_id = try_get(season, lambda x: x['id'], compat_str) + season_id = try_get(season, lambda x: x['id'], str) next_url = '' while next_url is not None: page_num += 1 @@ -226,11 +225,11 @@ class MxplayerShowIE(InfoExtractor): self._API_EPISODES_URL.format(season_id, next_url), video_id=season_id, headers={'Referer': 'https://mxplayer.in'}, - note='Downloading JSON metadata page %d' % page_num) + note=f'Downloading JSON metadata page {page_num}') for episode in season_json.get('items') or []: video_url = episode['webUrl'] yield self.url_result( - 'https://mxplayer.in%s' % video_url, + f'https://mxplayer.in{video_url}', ie=MxplayerIE.ie_key(), video_id=video_url.split('-')[-1]) next_url = season_json.get('next') diff --git a/yt_dlp/extractor/myspace.py b/yt_dlp/extractor/myspace.py index 3451098..fa2ef14 100644 --- a/yt_dlp/extractor/myspace.py +++ b/yt_dlp/extractor/myspace.py @@ -95,17 +95,17 @@ class MySpaceIE(InfoExtractor): if is_song: # songs don't store any useful info in the 'context' variable song_data = self._search_regex( - r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id, + rf'''<button.*data-song-id=(["\']){video_id}\1.*''', webpage, 'song_data', default=None, group=0) if song_data is None: # some songs in an album are not playable self.report_warning( - '%s: No downloadable song on this page' % video_id) + f'{video_id}: No downloadable song on this page') return def search_data(name): return self._search_regex( - r'''data-%s=([\'"])(?P<data>.*?)\1''' % name, + rf'''data-{name}=([\'"])(?P<data>.*?)\1''', song_data, name, default='', group='data') formats = formats_from_stream_urls( search_data('stream-url'), search_data('hls-stream-url'), @@ -114,10 +114,10 @@ class MySpaceIE(InfoExtractor): vevo_id = search_data('vevo-id') youtube_id = search_data('youtube-id') if vevo_id: - self.to_screen('Vevo video detected: %s' % vevo_id) - return self.url_result('vevo:%s' % vevo_id, ie='Vevo') + self.to_screen(f'Vevo video detected: {vevo_id}') + return self.url_result(f'vevo:{vevo_id}', ie='Vevo') elif youtube_id: - self.to_screen('Youtube video detected: %s' % youtube_id) + self.to_screen(f'Youtube video detected: {youtube_id}') return self.url_result(youtube_id, ie='Youtube') else: raise ExtractorError( @@ -181,7 +181,7 @@ class MySpaceAlbumIE(InfoExtractor): tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage) if not tracks_paths: raise ExtractorError( - '%s: No songs found, try using proxy' % display_id, + f'{display_id}: No songs found, try using proxy', expected=True) entries = [ self.url_result(t_path, ie=MySpaceIE.ie_key()) diff --git a/yt_dlp/extractor/myspass.py b/yt_dlp/extractor/myspass.py index 28ac982..3e8d506 100644 --- a/yt_dlp/extractor/myspass.py +++ b/yt_dlp/extractor/myspass.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, parse_duration, @@ -75,7 +74,7 @@ class MySpassIE(InfoExtractor): for group in self._search_regex(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url, 'myspass', group=(1, 2, 3), default=[]): group_int = int(group) if group_int > video_id_int: - video_url = video_url.replace(group, compat_str(group_int // video_id_int)) + video_url = video_url.replace(group, str(group_int // video_id_int)) return { 'id': video_id, diff --git a/yt_dlp/extractor/mzaalo.py b/yt_dlp/extractor/mzaalo.py index 1996368..52e5ea8 100644 --- a/yt_dlp/extractor/mzaalo.py +++ b/yt_dlp/extractor/mzaalo.py @@ -24,7 +24,7 @@ class MzaaloIE(InfoExtractor): 'categories': ['Drama'], 'age_limit': 13, }, - 'params': {'skip_download': 'm3u8'} + 'params': {'skip_download': 'm3u8'}, }, { # Shows 'url': 'https://www.mzaalo.com/play/original/93d42b2b-f373-4c2d-bca4-997412cb069d/Modi-Season-2-CM-TO-PM/Episode-1:Decision,-Not-Promises', @@ -40,7 +40,7 @@ class MzaaloIE(InfoExtractor): 'categories': ['Drama'], 'age_limit': 13, }, - 'params': {'skip_download': 'm3u8'} + 'params': {'skip_download': 'm3u8'}, }, { # Streams/Clips 'url': 'https://www.mzaalo.com/play/clip/83cdbcb5-400a-42f1-a1d2-459053cfbda5/Manto-Ki-Kahaaniya', @@ -54,7 +54,7 @@ class MzaaloIE(InfoExtractor): 'duration': 1937.0, 'language': 'hin', }, - 'params': {'skip_download': 'm3u8'} + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://mzaalo.com/watch/MOVIE/389c892d-0b65-4019-bf73-d4edcb1c014f/Chalo-Dilli', 'only_matching': True, diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py index 8a8a5fe..bbb327e 100644 --- a/yt_dlp/extractor/n1.py +++ b/yt_dlp/extractor/n1.py @@ -16,7 +16,7 @@ class N1InfoAssetIE(InfoExtractor): 'id': 'ljsottomazilirija3060921-n1info-si-worldwide', 'ext': 'mp4', 'title': 'ljsottomazilirija3060921-n1info-si-worldwide', - } + }, }] def _real_extract(self, url): @@ -46,7 +46,7 @@ class N1InfoIIE(InfoExtractor): 'description': 'md5:467f330af1effedd2e290f10dc31bb8e', 'uploader': 'Sport Klub', 'uploader_id': 'sportklub', - } + }, }, { 'url': 'https://rs.n1info.com/vesti/djilas-los-plan-za-metro-nece-resiti-nijedan-saobracajni-problem/', 'info_dict': { @@ -102,7 +102,7 @@ class N1InfoIIE(InfoExtractor): 'title': 'Ćuta: Biti u Kosovskoj Mitrovici znači da te dočekaju eksplozivnim napravama', 'upload_date': '20230620', 'timestamp': 1687290536, - 'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg' + 'thumbnail': 'https://cdn.brid.tv/live/partners/26827/snapshot/1332368_th_6492013a8356f_1687290170.jpg', }, }, { 'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/', diff --git a/yt_dlp/extractor/nate.py b/yt_dlp/extractor/nate.py index 5e74caa..bbc641f 100644 --- a/yt_dlp/extractor/nate.py +++ b/yt_dlp/extractor/nate.py @@ -29,7 +29,7 @@ class NateIE(InfoExtractor): 'uploader_id': '3606', 'tags': 'count:59', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://tv.nate.com/clip/4300566', 'info_dict': { @@ -47,7 +47,7 @@ class NateIE(InfoExtractor): 'uploader_id': '27987', 'tags': 'count:20', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] _QUALITY = { @@ -60,8 +60,8 @@ class NateIE(InfoExtractor): } def _real_extract(self, url): - id = self._match_id(url) - video_data = self._download_json(f'https://tv.nate.com/api/v1/clip/{id}', id) + video_id = self._match_id(url) + video_data = self._download_json(f'https://tv.nate.com/api/v1/clip/{video_id}', video_id) formats = [{ 'format_id': f_url[-2:], 'url': f_url, @@ -69,7 +69,7 @@ class NateIE(InfoExtractor): 'quality': int_or_none(f_url[-2:]), } for f_url in video_data.get('smcUriList') or []] return { - 'id': id, + 'id': video_id, 'title': video_data.get('clipTitle'), 'description': video_data.get('synopsis'), 'thumbnail': video_data.get('contentImg'), @@ -102,19 +102,19 @@ class NateProgramIE(InfoExtractor): }, }] - def _entries(self, id): + def _entries(self, playlist_id): for page_num in itertools.count(1): - program_data = self._download_json(f'https://tv.nate.com/api/v1/program/{id}/clip/ranking?size=20&page={page_num}', - id, note=f'Downloading page {page_num}') + program_data = self._download_json( + f'https://tv.nate.com/api/v1/program/{playlist_id}/clip/ranking?size=20&page={page_num}', + playlist_id, note=f'Downloading page {page_num}') for clip in program_data.get('content') or []: clip_id = clip.get('clipSeq') if clip_id: yield self.url_result( - 'https://tv.nate.com/clip/%s' % clip_id, - ie=NateIE.ie_key(), video_id=clip_id) + f'https://tv.nate.com/clip/{clip_id}', NateIE, playlist_id) if program_data.get('last'): break def _real_extract(self, url): - id = self._match_id(url) - return self.playlist_result(self._entries(id), playlist_id=id) + playlist_id = self._match_id(url) + return self.playlist_result(self._entries(playlist_id), playlist_id=playlist_id) diff --git a/yt_dlp/extractor/nationalgeographic.py b/yt_dlp/extractor/nationalgeographic.py index 6f046bc..43f84a9 100644 --- a/yt_dlp/extractor/nationalgeographic.py +++ b/yt_dlp/extractor/nationalgeographic.py @@ -55,7 +55,7 @@ class NationalGeographicVideoIE(InfoExtractor): '_type': 'url_transparent', 'ie_key': 'ThePlatform', 'url': smuggle_url( - 'http://link.theplatform.com/s/ngs/media/guid/2423130747/%s?mbr=true' % guid, + f'http://link.theplatform.com/s/ngs/media/guid/2423130747/{guid}?mbr=true', {'force_smil_url': True}), 'id': guid, } diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index 26400e3..a9f7f46 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -36,7 +36,7 @@ class NaverBaseIE(InfoExtractor): type_ = 'automatic_captions' if caption.get('type') == 'auto' else 'subtitles' lang = caption.get('locale') or join_nonempty('language', 'country', from_dict=caption) or 'und' if caption.get('type') == 'fan': - lang += '_fan%d' % next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in ret[type_]) + lang += '_fan{}'.format(next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in ret[type_])) ret[type_].setdefault(lang, []).extend({ 'url': sub_url, 'name': join_nonempty('label', 'fanName', from_dict=caption, delim=' - '), @@ -63,7 +63,7 @@ class NaverBaseIE(InfoExtractor): encoding_option = stream.get('encodingOption', {}) bitrate = stream.get('bitrate', {}) formats.append({ - 'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))), + 'format_id': '{}_{}'.format(stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))), 'url': stream_url, 'ext': 'mp4', 'width': int_or_none(encoding_option.get('width')), @@ -261,7 +261,7 @@ class NaverLiveIE(NaverBaseIE): 'thumbnail': ('thumbnailImageUrl', {url_or_none}), 'start_time': (('startTime', 'startDateTime', 'startYmdt'), {parse_iso8601}), }), get_all=False), - 'is_live': True + 'is_live': True, } @@ -286,7 +286,7 @@ class NaverNowIE(NaverBaseIE): }, 'params': { 'noplaylist': True, - } + }, }, { 'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=', 'md5': '9f6118e398aa0f22b2152f554ea7851b', @@ -311,7 +311,7 @@ class NaverNowIE(NaverBaseIE): 'id': '4759', 'title': '아이키의 떰즈업', }, - 'playlist_mincount': 101 + 'playlist_mincount': 101, }, { 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay', 'info_dict': { @@ -348,7 +348,7 @@ class NaverNowIE(NaverBaseIE): show_vod_info = self._download_json( f'{self._API_URL}/vod-shows/now.{show_id}', show_id, query={'page': page, 'page_size': page_size}, - note=f'Downloading JSON vod list for show {show_id} - page {page}' + note=f'Downloading JSON vod list for show {show_id} - page {page}', )['response']['result'] for v in show_vod_info.get('vod_list') or []: yield self._extract_replay(show_id, v['id']) diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index ec4d636..91ae1d1 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -1,11 +1,8 @@ import functools import re +import urllib.parse from .turner import TurnerBaseIE -from ..compat import ( - compat_str, - compat_urllib_parse_unquote, -) from ..utils import ( OnDemandPagedList, int_or_none, @@ -22,7 +19,7 @@ from ..utils import ( class NBACVPBaseIE(TurnerBaseIE): def _extract_nba_cvp_info(self, path, video_id, fatal=False): return self._extract_cvp_info( - 'http://secure.nba.com/%s' % path, video_id, { + f'http://secure.nba.com/{path}', video_id, { 'default': { 'media_src': 'http://nba.cdn.turner.com/nba/big', }, @@ -185,7 +182,7 @@ class NBAWatchCollectionIE(NBAWatchBaseIE): page += 1 videos = self._download_json( 'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id, - collection_id, 'Downloading page %d JSON metadata' % page, query={ + collection_id, f'Downloading page {page} JSON metadata', query={ 'count': self._PAGE_SIZE, 'page': page, })['results']['videos'] @@ -260,14 +257,14 @@ class NBABaseIE(NBACVPBaseIE): def _call_api(self, team, content_id, query, resource): return self._download_json( - 'https://api.nba.net/2/%s/video,imported_video,wsc/' % team, - content_id, 'Download %s JSON metadata' % resource, + f'https://api.nba.net/2/{team}/video,imported_video,wsc/', + content_id, f'Download {resource} JSON metadata', query=query, headers={ 'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b', })['response']['result'] def _extract_video(self, video, team, extract_all=True): - video_id = compat_str(video['nid']) + video_id = str(video['nid']) team = video['brand'] info = { @@ -330,7 +327,7 @@ class NBABaseIE(NBACVPBaseIE): def _real_extract(self, url): team, display_id = self._match_valid_url(url).groups() if '/play#/' in url: - display_id = compat_urllib_parse_unquote(display_id) + display_id = urllib.parse.unquote(display_id) else: webpage = self._download_webpage(url, display_id) display_id = self._search_regex( @@ -362,7 +359,7 @@ class NBAEmbedIE(NBABaseIE): class NBAIE(NBABaseIE): IE_NAME = 'nba' - _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX + _VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?!{NBABaseIE._CHANNEL_PATH_REGEX})video/(?P<id>(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774', 'info_dict': { @@ -389,7 +386,7 @@ class NBAIE(NBABaseIE): class NBAChannelIE(NBABaseIE): IE_NAME = 'nba:channel' - _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX + _VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?:{NBABaseIE._CHANNEL_PATH_REGEX})/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.nba.com/blazers/video/channel/summer_league', 'info_dict': { @@ -408,7 +405,7 @@ class NBAChannelIE(NBABaseIE): 'channels': channel, 'count': self._PAGE_SIZE, 'offset': page * self._PAGE_SIZE, - }, 'page %d' % (page + 1)) + }, f'page {page + 1}') for video in results: yield self._extract_video(video, team, False) diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index e88f98a..8f6fb22 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -1,12 +1,12 @@ import base64 import json import re +import urllib.parse import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor from .theplatform import ThePlatformIE, default_ns -from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..utils import ( ExtractorError, @@ -16,6 +16,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + join_nonempty, mimetype2ext, parse_age_limit, parse_duration, @@ -148,12 +149,12 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE # Percent escaped url 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189', 'only_matching': True, - } + }, ] def _real_extract(self, url): permalink, video_id = self._match_valid_url(url).groups() - permalink = 'http' + compat_urllib_parse_unquote(permalink) + permalink = 'http' + urllib.parse.unquote(permalink) video_data = self._download_json( 'https://friendship.nbc.co/v2/graphql', video_id, query={ 'query': '''query bonanzaPage( @@ -201,7 +202,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE 'switch': 'HLSServiceSecure', } video_id = video_data['mpxGuid'] - tp_path = 'NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id) + tp_path = 'NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id) tpm = self._download_theplatform_metadata(tp_path, video_id) title = tpm.get('title') or video_data.get('secondaryTitle') if video_data.get('locked'): @@ -211,7 +212,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE query['auth'] = self._extract_mvpd_auth( url, video_id, 'nbcentertainment', resource) theplatform_url = smuggle_url(update_url_query( - 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id), + 'http://link.theplatform.com/s/NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id), query), {'force_smil_url': True}) # Empty string or 0 can be valid values for these. So the check must be `is None` @@ -253,7 +254,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE class NBCSportsVPlayerIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/' _VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)' - _EMBED_REGEX = [r'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>%s[^\"]+)' % _VALID_URL_BASE] + _EMBED_REGEX = [rf'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>{_VALID_URL_BASE}[^\"]+)'] _TESTS = [{ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI', @@ -267,8 +268,8 @@ class NBCSportsVPlayerIE(InfoExtractor): 'uploader': 'NBCU-SPORTS', 'duration': 72.818, 'chapters': [], - 'thumbnail': r're:^https?://.*\.jpg$' - } + 'thumbnail': r're:^https?://.*\.jpg$', + }, }, { 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/PEgOtlNcC_y2', 'only_matching': True, @@ -301,7 +302,7 @@ class NBCSportsIE(InfoExtractor): 'chapters': [], 'thumbnail': 'https://hdliveextra-a.akamaihd.net/HD/image_sports/NBCU_Sports_Group_-_nbcsports/253/303/izzodps.jpg', 'duration': 528.395, - } + }, }, { # data-mpx-src 'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot', @@ -339,7 +340,7 @@ class NBCSportsStreamIE(AdobePassIE): def _real_extract(self, url): video_id = self._match_id(url) live_source = self._download_json( - 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id, + f'http://stream.nbcsports.com/data/live_sources_{video_id}.json', video_id) video_source = live_source['videoSources'][0] title = video_source['title'] @@ -498,10 +499,8 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE m3u8_id=format_id, fatal=False)) continue tbr = int_or_none(va.get('bitrate'), 1000) - if tbr: - format_id += '-%d' % tbr formats.append({ - 'format_id': format_id, + 'format_id': join_nonempty(format_id, tbr), 'url': public_url, 'width': int_or_none(va.get('width')), 'height': int_or_none(va.get('height')), @@ -568,7 +567,7 @@ class NBCOlympicsIE(InfoExtractor): except RegexNotFoundError: theplatform_url = self._search_regex( r"([\"'])embedUrl\1: *([\"'])(?P<embedUrl>.+)\2", - webpage, 'embedding URL', group="embedUrl") + webpage, 'embedding URL', group='embedUrl') return { '_type': 'url_transparent', @@ -623,7 +622,7 @@ class NBCOlympicsStreamIE(AdobePassIE): source_url = self._download_json( f'https://api-leap.nbcsports.com/feeds/assets/{pid}?application=NBCOlympics&platform=desktop&format=nbc-player&env=staging', - pid, 'Downloading leap config' + pid, 'Downloading leap config', )['videoSources'][0]['cdnSources']['primary'][0]['sourceUrl'] if event_config.get('cdnToken'): diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index 243221d..5181c7f 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse from ..utils import ( ExtractorError, determine_ext, @@ -125,13 +125,13 @@ class NDRIE(NDRBaseIE): # some more work needed if we only found sophoraID if re.match(r'^[a-z]+\d+$', embed_url): # get the initial part of the url path,. eg /panorama/archiv/2022/ - parsed_url = compat_urllib_parse_urlparse(url) - path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='') + parsed_url = urllib.parse.urlparse(url) + path = self._search_regex(rf'(.+/){display_id}', parsed_url.path or '', 'embed URL', default='') # find tell-tale image with the actual ID - ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None) + ndr_id = self._search_regex(rf'{path}([a-z]+\d+)(?!\.)\b', webpage, 'embed URL', default=None) # or try to use special knowledge! NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html' - embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, ) + embed_url = f'ndr:{ndr_id}' if ndr_id else NDR_INFO_URL_TPL % (embed_url, ) if not embed_url: raise ExtractorError('Unable to extract embedUrl') @@ -141,7 +141,7 @@ class NDRIE(NDRBaseIE): timestamp = parse_iso8601( self._search_regex( (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"', - r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ), + r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)'), webpage, 'upload date', group='cont', default=None)) info = self._search_json_ld(webpage, display_id, default={}) return merge_dicts({ @@ -200,7 +200,7 @@ class NJoyIE(NDRBaseIE): # find tell-tale URL with the actual ID, or ... video_id = self._search_regex( (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''', - r'<iframe[^>]+id="pp_([\da-z]+)"', ), + r'<iframe[^>]+id="pp_([\da-z]+)"'), webpage, 'NDR id', default=None) description = ( @@ -211,7 +211,7 @@ class NJoyIE(NDRBaseIE): return { '_type': 'url_transparent', 'ie_key': 'NDREmbedBase', - 'url': 'ndr:%s' % video_id, + 'url': f'ndr:{video_id}', 'display_id': display_id, 'description': description, 'title': display_id.replace('-', ' ').strip(), @@ -234,7 +234,7 @@ class NDREmbedBaseIE(InfoExtractor): # XXX: Conventionally, Concrete class name video_id = mobj.group('id') or mobj.group('id_s') ppjson = self._download_json( - 'http://www.ndr.de/%s-ppjson.json' % video_id, video_id) + f'http://www.ndr.de/{video_id}-ppjson.json', video_id) playlist = ppjson['playlist'] diff --git a/yt_dlp/extractor/ndtv.py b/yt_dlp/extractor/ndtv.py index d099db3..c328bd4 100644 --- a/yt_dlp/extractor/ndtv.py +++ b/yt_dlp/extractor/ndtv.py @@ -15,12 +15,12 @@ class NDTVIE(InfoExtractor): 'info_dict': { 'id': '468818', 'ext': 'mp4', - 'title': "प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल", + 'title': 'प्राइम टाइम: सिस्टम बीमार, स्कूल बदहाल', 'description': 'md5:f410512f1b49672e5695dea16ef2731d', 'upload_date': '20170928', 'duration': 2218, 'thumbnail': r're:https?://.*\.jpg', - } + }, }, { # __filename is url @@ -29,45 +29,45 @@ class NDTVIE(InfoExtractor): 'info_dict': { 'id': '470304', 'ext': 'mp4', - 'title': "Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars", + 'title': 'Cracker-Free Diwali Wishes From Karan Johar, Kriti Sanon & Other Stars', 'description': 'md5:f115bba1adf2f6433fa7c1ade5feb465', 'upload_date': '20171019', 'duration': 137, 'thumbnail': r're:https?://.*\.jpg', - } + }, }, { 'url': 'https://www.ndtv.com/video/news/news/delhi-s-air-quality-status-report-after-diwali-is-very-poor-470372', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://auto.ndtv.com/videos/the-cnb-daily-october-13-2017-469935', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://sports.ndtv.com/cricket/videos/2nd-t20i-rock-thrown-at-australia-cricket-team-bus-after-win-over-india-469764', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://gadgets.ndtv.com/videos/uncharted-the-lost-legacy-review-465568', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://profit.ndtv.com/videos/news/video-indian-economy-on-very-solid-track-international-monetary-fund-chief-470040', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://food.ndtv.com/video-basil-seeds-coconut-porridge-419083', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://doctor.ndtv.com/videos/top-health-stories-of-the-week-467396', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://swirlster.ndtv.com/video/how-to-make-friends-at-work-469324', - 'only_matching': True - } + 'only_matching': True, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/nekohacker.py b/yt_dlp/extractor/nekohacker.py index 24b6657..537158e 100644 --- a/yt_dlp/extractor/nekohacker.py +++ b/yt_dlp/extractor/nekohacker.py @@ -38,8 +38,8 @@ class NekoHackerIE(InfoExtractor): 'artist': 'Neko Hacker', 'track': 'Spaceship', 'track_number': 1, - 'duration': 195.0 - } + 'duration': 195.0, + }, }, { 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/02-City-Runner.mp3', @@ -56,8 +56,8 @@ class NekoHackerIE(InfoExtractor): 'artist': 'Neko Hacker', 'track': 'City Runner', 'track_number': 2, - 'duration': 148.0 - } + 'duration': 148.0, + }, }, { 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/03-Nature-Talk.mp3', @@ -74,8 +74,8 @@ class NekoHackerIE(InfoExtractor): 'artist': 'Neko Hacker', 'track': 'Nature Talk', 'track_number': 3, - 'duration': 174.0 - } + 'duration': 174.0, + }, }, { 'url': 'https://nekohacker.com/wp-content/uploads/2022/11/04-Crystal-World.mp3', @@ -92,10 +92,10 @@ class NekoHackerIE(InfoExtractor): 'artist': 'Neko Hacker', 'track': 'Crystal World', 'track_number': 4, - 'duration': 199.0 - } - } - ] + 'duration': 199.0, + }, + }, + ], }, { 'url': 'https://nekohacker.com/susume/', 'info_dict': { @@ -118,7 +118,7 @@ class NekoHackerIE(InfoExtractor): 'artist': 'Neko Hacker', 'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0', 'track_number': 1, - } + }, }, { 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-feat.-六科なじむ-CV_-日高里菜-.mp3', @@ -135,7 +135,7 @@ class NekoHackerIE(InfoExtractor): 'artist': 'Neko Hacker', 'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )', 'track_number': 2, - } + }, }, { 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/進め!むじなカンパニー-instrumental.mp3', @@ -152,7 +152,7 @@ class NekoHackerIE(InfoExtractor): 'artist': 'Neko Hacker', 'track': '進め!むじなカンパニー (instrumental)', 'track_number': 3, - } + }, }, { 'url': 'https://nekohacker.com/wp-content/uploads/2021/01/むじな-de-なじむ-instrumental.mp3', @@ -169,9 +169,9 @@ class NekoHackerIE(InfoExtractor): 'artist': 'Neko Hacker', 'track': 'むじな de なじむ (instrumental)', 'track_number': 4, - } - } - ] + }, + }, + ], }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index b54c12e..a759da2 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -22,12 +22,22 @@ from ..utils import ( class NetEaseMusicBaseIE(InfoExtractor): - _FORMATS = ['bMusic', 'mMusic', 'hMusic'] + # XXX: _extract_formats logic depends on the order of the levels in each tier + _LEVELS = ( + 'standard', # free tier; 标准; 128kbps mp3 or aac + 'higher', # free tier; 192kbps mp3 or aac + 'exhigh', # free tier; 极高 (HQ); 320kbps mp3 or aac + 'lossless', # VIP tier; 无损 (SQ); 48kHz/16bit flac + 'hires', # VIP tier; 高解析度无损 (Hi-Res); 192kHz/24bit flac + 'jyeffect', # VIP tier; 高清臻音 (Spatial Audio); 96kHz/24bit flac + 'jymaster', # SVIP tier; 超清母带 (Master); 192kHz/24bit flac + 'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac + ) _API_BASE = 'http://music.163.com/api/' _GEO_BYPASS = False @staticmethod - def kilo_or_none(value): + def _kilo_or_none(value): return int_or_none(value, scale=1000) def _create_eapi_cipher(self, api_path, query_body, cookies): @@ -56,7 +66,7 @@ class NetEaseMusicBaseIE(InfoExtractor): 'requestId': f'{int(time.time() * 1000)}_{random.randint(0, 1000):04}', **traverse_obj(self._get_cookies(self._API_BASE), { 'MUSIC_U': ('MUSIC_U', {lambda i: i.value}), - }) + }), } return self._download_json( urljoin('https://interface3.music.163.com/', f'/eapi{path}'), video_id, @@ -66,45 +76,43 @@ class NetEaseMusicBaseIE(InfoExtractor): **headers, }, **kwargs) - def _call_player_api(self, song_id, bitrate): + def _call_player_api(self, song_id, level): return self._download_eapi_json( - '/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate}, - note=f'Downloading song URL info: bitrate {bitrate}') + '/song/enhance/player/url/v1', song_id, + {'ids': f'[{song_id}]', 'level': level, 'encodeType': 'flac'}, + note=f'Downloading song URL info: level {level}') - def extract_formats(self, info): - err = 0 + def _extract_formats(self, info): formats = [] song_id = info['id'] - for song_format in self._FORMATS: - details = info.get(song_format) - if not details: + for level in self._LEVELS: + song = traverse_obj( + self._call_player_api(song_id, level), ('data', lambda _, v: url_or_none(v['url']), any)) + if not song: + break # Media is not available due to removal or geo-restriction + actual_level = song.get('level') + if actual_level and actual_level != level: + if level in ('lossless', 'jymaster'): + break # We've already extracted the highest level of the user's account tier continue - bitrate = int_or_none(details.get('bitrate')) or 999000 - for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))): - song_url = song['url'] - if self._is_valid_url(song_url, info['id'], 'song'): - formats.append({ - 'url': song_url, - 'format_id': song_format, - 'asr': traverse_obj(details, ('sr', {int_or_none})), - **traverse_obj(song, { - 'ext': ('type', {str}), - 'abr': ('br', {self.kilo_or_none}), - 'filesize': ('size', {int_or_none}), - }), - }) - elif err == 0: - err = traverse_obj(song, ('code', {int})) or 0 - + formats.append({ + 'url': song['url'], + 'format_id': level, + 'vcodec': 'none', + **traverse_obj(song, { + 'ext': ('type', {str}), + 'abr': ('br', {self._kilo_or_none}), + 'filesize': ('size', {int_or_none}), + }), + }) + if not actual_level: + break # Only 1 level is available if API does not return a value (netease:program) if not formats: - if err != 0 and (err < 200 or err >= 400): - raise ExtractorError(f'No media links found (site code {err})', expected=True) - else: - self.raise_geo_restricted( - 'No media links found: probably due to geo restriction.', countries=['CN']) + self.raise_geo_restricted( + 'No media links found; possibly due to geo restriction', countries=['CN']) return formats - def query_api(self, endpoint, video_id, note): + def _query_api(self, endpoint, video_id, note): result = self._download_json( f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE}) code = traverse_obj(result, ('code', {int})) @@ -128,32 +136,29 @@ class NetEaseMusicBaseIE(InfoExtractor): class NetEaseMusicIE(NetEaseMusicBaseIE): IE_NAME = 'netease:song' IE_DESC = '网易云音乐' - _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)' _TESTS = [{ - 'url': 'https://music.163.com/#/song?id=548648087', + 'url': 'https://music.163.com/#/song?id=550136151', 'info_dict': { - 'id': '548648087', + 'id': '550136151', 'ext': 'mp3', - 'title': '戒烟 (Live)', - 'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊', + 'title': 'It\'s Ok (Live)', + 'creators': 'count:10', 'timestamp': 1522944000, 'upload_date': '20180405', - 'description': 'md5:3650af9ee22c87e8637cb2dde22a765c', - 'subtitles': {'lyrics': [{'ext': 'lrc'}]}, - "duration": 256, + 'description': 'md5:9fd07059c2ccee3950dc8363429a3135', + 'duration': 197, 'thumbnail': r're:^http.*\.jpg', 'album': '偶像练习生 表演曲目合集', 'average_rating': int, - 'album_artist': '偶像练习生', + 'album_artists': ['偶像练习生'], }, }, { - 'note': 'No lyrics.', 'url': 'http://music.163.com/song?id=17241424', 'info_dict': { 'id': '17241424', 'ext': 'mp3', 'title': 'Opus 28', - 'creator': 'Dustin O\'Halloran', 'upload_date': '20080211', 'timestamp': 1202745600, 'duration': 263, @@ -161,15 +166,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'album': 'Piano Solos Vol. 2', 'album_artist': 'Dustin O\'Halloran', 'average_rating': int, + 'description': '[00:05.00]纯音乐,请欣赏\n', + 'album_artists': ['Dustin O\'Halloran'], + 'creators': ['Dustin O\'Halloran'], + 'subtitles': {'lyrics': [{'ext': 'lrc'}]}, }, }, { 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', - 'md5': '95826c73ea50b1c288b22180ec9e754d', + 'md5': 'b896be78d8d34bd7bb665b26710913ff', 'info_dict': { 'id': '95670', 'ext': 'mp3', 'title': '国际歌', - 'creator': '马备', 'upload_date': '19911130', 'timestamp': 691516800, 'description': 'md5:1ba2f911a2b0aa398479f595224f2141', @@ -180,6 +188,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'average_rating': int, 'album': '红色摇滚', 'album_artist': '侯牧人', + 'creators': ['马备'], + 'album_artists': ['侯牧人'], }, }, { 'url': 'http://music.163.com/#/song?id=32102397', @@ -188,7 +198,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'id': '32102397', 'ext': 'mp3', 'title': 'Bad Blood', - 'creator': 'Taylor Swift / Kendrick Lamar', + 'creators': ['Taylor Swift', 'Kendrick Lamar'], 'upload_date': '20150516', 'timestamp': 1431792000, 'description': 'md5:21535156efb73d6d1c355f95616e285a', @@ -207,7 +217,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'id': '22735043', 'ext': 'mp3', 'title': '소원을 말해봐 (Genie)', - 'creator': '少女时代', + 'creators': ['少女时代'], 'upload_date': '20100127', 'timestamp': 1264608000, 'description': 'md5:03d1ffebec3139aa4bafe302369269c5', @@ -251,12 +261,12 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): def _real_extract(self, url): song_id = self._match_id(url) - info = self.query_api( + info = self._query_api( f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0] - formats = self.extract_formats(info) + formats = self._extract_formats(info) - lyrics = self._process_lyrics(self.query_api( + lyrics = self._process_lyrics(self._query_api( f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data')) lyric_data = { 'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False), @@ -267,14 +277,14 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): 'id': song_id, 'formats': formats, 'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None, - 'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None, - 'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None, + 'creators': traverse_obj(info, ('artists', ..., 'name')) or None, + 'album_artists': traverse_obj(info, ('album', 'artists', ..., 'name')) or None, **lyric_data, **traverse_obj(info, { 'title': ('name', {str}), - 'timestamp': ('album', 'publishTime', {self.kilo_or_none}), + 'timestamp': ('album', 'publishTime', {self._kilo_or_none}), 'thumbnail': ('album', 'picUrl', {url_or_none}), - 'duration': ('duration', {self.kilo_or_none}), + 'duration': ('duration', {self._kilo_or_none}), 'album': ('album', 'name', {str}), 'average_rating': ('score', {int_or_none}), }), @@ -284,7 +294,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): class NetEaseMusicAlbumIE(NetEaseMusicBaseIE): IE_NAME = 'netease:album' IE_DESC = '网易云音乐 - 专辑' - _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://music\.163\.com/(?:#/)?album\?id=(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://music.163.com/#/album?id=133153666', 'info_dict': { @@ -294,7 +304,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE): 'description': '桃几2021年翻唱合集', 'thumbnail': r're:^http.*\.jpg', }, - 'playlist_mincount': 13, + 'playlist_mincount': 12, }, { 'url': 'http://music.163.com/#/album?id=220780', 'info_dict': { @@ -328,7 +338,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE): class NetEaseMusicSingerIE(NetEaseMusicBaseIE): IE_NAME = 'netease:singer' IE_DESC = '网易云音乐 - 歌手' - _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://music\.163\.com/(?:#/)?artist\?id=(?P<id>[0-9]+)' _TESTS = [{ 'note': 'Singer has aliases.', 'url': 'http://music.163.com/#/artist?id=10559', @@ -358,7 +368,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE): def _real_extract(self, url): singer_id = self._match_id(url) - info = self.query_api( + info = self._query_api( f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data') name = join_nonempty( @@ -372,7 +382,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE): class NetEaseMusicListIE(NetEaseMusicBaseIE): IE_NAME = 'netease:playlist' IE_DESC = '网易云音乐 - 歌单' - _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:playlist|discover/toplist)\?id=(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://music.163.com/#/playlist?id=79177352', 'info_dict': { @@ -405,11 +415,15 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE): 'url': 'http://music.163.com/#/discover/toplist?id=3733003', 'info_dict': { 'id': '3733003', - 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}', + 'title': 're:韩国Melon排行榜周榜(?: [0-9]{4}-[0-9]{2}-[0-9]{2})?', 'description': 'md5:73ec782a612711cadc7872d9c1e134fc', + 'upload_date': '20200109', + 'uploader_id': '2937386', + 'tags': ['韩语', '榜单'], + 'uploader': 'Melon榜单', + 'timestamp': 1578569373, }, 'playlist_count': 50, - 'skip': 'Blocked outside Mainland China', }] def _real_extract(self, url): @@ -418,7 +432,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE): info = self._download_eapi_json( '/v3/playlist/detail', list_id, {'id': list_id, 't': '-1', 'n': '500', 's': '0'}, - note="Downloading playlist info") + note='Downloading playlist info') metainfo = traverse_obj(info, ('playlist', { 'title': ('name', {str}), @@ -426,7 +440,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE): 'tags': ('tags', ..., {str}), 'uploader': ('creator', 'nickname', {str}), 'uploader_id': ('creator', 'userId', {str_or_none}), - 'timestamp': ('updateTime', {self.kilo_or_none}), + 'timestamp': ('updateTime', {self._kilo_or_none}), })) if traverse_obj(info, ('playlist', 'specialType')) == 10: metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}' @@ -437,7 +451,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE): class NetEaseMusicMvIE(NetEaseMusicBaseIE): IE_NAME = 'netease:mv' IE_DESC = '网易云音乐 - MV' - _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://music\.163\.com/(?:#/)?mv\?id=(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://music.163.com/#/mv?id=10958064', 'info_dict': { @@ -445,7 +459,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE): 'ext': 'mp4', 'title': '交换余生', 'description': 'md5:e845872cff28820642a2b02eda428fea', - 'creator': '林俊杰', + 'creators': ['林俊杰'], 'upload_date': '20200916', 'thumbnail': r're:http.*\.jpg', 'duration': 364, @@ -460,7 +474,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE): 'ext': 'mp4', 'title': '이럴거면 그러지말지', 'description': '白雅言自作曲唱甜蜜爱情', - 'creator': '白娥娟', + 'creators': ['白娥娟'], 'upload_date': '20150520', 'thumbnail': r're:http.*\.jpg', 'duration': 216, @@ -468,12 +482,28 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE): 'like_count': int, 'comment_count': int, }, + 'skip': 'Blocked outside Mainland China', + }, { + 'note': 'This MV has multiple creators.', + 'url': 'https://music.163.com/#/mv?id=22593543', + 'info_dict': { + 'id': '22593543', + 'ext': 'mp4', + 'title': '老北京杀器', + 'creators': ['秃子2z', '辉子', 'Saber梁维嘉'], + 'duration': 206, + 'upload_date': '20240618', + 'like_count': int, + 'comment_count': int, + 'thumbnail': r're:http.*\.jpg', + 'view_count': int, + }, }] def _real_extract(self, url): mv_id = self._match_id(url) - info = self.query_api( + info = self._query_api( f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data'] formats = [ @@ -484,13 +514,13 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE): return { 'id': mv_id, 'formats': formats, + 'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')], **traverse_obj(info, { 'title': ('name', {str}), 'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}), - 'creator': ('artistName', {str}), 'upload_date': ('publishTime', {unified_strdate}), 'thumbnail': ('cover', {url_or_none}), - 'duration': ('duration', {self.kilo_or_none}), + 'duration': ('duration', {self._kilo_or_none}), 'view_count': ('playCount', {int_or_none}), 'like_count': ('likeCount', {int_or_none}), 'comment_count': ('commentCount', {int_or_none}), @@ -501,7 +531,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE): class NetEaseMusicProgramIE(NetEaseMusicBaseIE): IE_NAME = 'netease:program' IE_DESC = '网易云音乐 - 电台节目' - _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://music.163.com/#/program?id=10109055', 'info_dict': { @@ -509,7 +539,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): 'ext': 'mp3', 'title': '不丹足球背后的故事', 'description': '喜马拉雅人的足球梦 ...', - 'creator': '大话西藏', + 'creators': ['大话西藏'], 'timestamp': 1434179287, 'upload_date': '20150613', 'thumbnail': r're:http.*\.jpg', @@ -522,7 +552,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): 'id': '10141022', 'title': '滚滚电台的有声节目', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', - 'creator': '滚滚电台ORZ', + 'creators': ['滚滚电台ORZ'], 'timestamp': 1434450733, 'upload_date': '20150616', 'thumbnail': r're:http.*\.jpg', @@ -536,21 +566,21 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): 'ext': 'mp3', 'title': '滚滚电台的有声节目', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', - 'creator': '滚滚电台ORZ', + 'creators': ['滚滚电台ORZ'], 'timestamp': 1434450733, 'upload_date': '20150616', 'thumbnail': r're:http.*\.jpg', 'duration': 1104, }, 'params': { - 'noplaylist': True + 'noplaylist': True, }, }] def _real_extract(self, url): program_id = self._match_id(url) - info = self.query_api( + info = self._query_api( f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program'] metainfo = traverse_obj(info, { @@ -558,17 +588,17 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): 'description': ('description', {str}), 'creator': ('dj', 'brand', {str}), 'thumbnail': ('coverUrl', {url_or_none}), - 'timestamp': ('createTime', {self.kilo_or_none}), + 'timestamp': ('createTime', {self._kilo_or_none}), }) if not self._yes_playlist( info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'): - formats = self.extract_formats(info['mainSong']) + formats = self._extract_formats(info['mainSong']) return { 'id': str(info['mainSong']['id']), 'formats': formats, - 'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})), + 'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})), **metainfo, } @@ -579,13 +609,13 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): IE_NAME = 'netease:djradio' IE_DESC = '网易云音乐 - 电台' - _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)' + _VALID_URL = r'https?://music\.163\.com/(?:#/)?djradio\?id=(?P<id>[0-9]+)' _TEST = { 'url': 'http://music.163.com/#/djradio?id=42', 'info_dict': { 'id': '42', 'title': '声音蔓延', - 'description': 'md5:c7381ebd7989f9f367668a5aee7d5f08' + 'description': 'md5:c7381ebd7989f9f367668a5aee7d5f08', }, 'playlist_mincount': 40, } @@ -597,7 +627,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): metainfo = {} entries = [] for offset in itertools.count(start=0, step=self._PAGE_SIZE): - info = self.query_api( + info = self._query_api( f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}', dj_id, note=f'Downloading dj programs - {offset}') diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py index ef53e15..2ddec5c 100644 --- a/yt_dlp/extractor/netverse.py +++ b/yt_dlp/extractor/netverse.py @@ -63,7 +63,7 @@ class NetverseIE(NetverseBaseIE): 'timestamp': 1626919804, 'like_count': int, 'uploader': 'Net Prime', - } + }, }, { # series 'url': 'https://www.netverse.id/watch/jadoo-seorang-model', @@ -87,7 +87,7 @@ class NetverseIE(NetverseBaseIE): 'uploader': 'Net Prime', 'age_limit': 0, }, - 'skip': 'video get Geo-blocked for some country' + 'skip': 'video get Geo-blocked for some country', }, { # non www host 'url': 'https://netverse.id/watch/tetangga-baru', @@ -135,7 +135,7 @@ class NetverseIE(NetverseBaseIE): 'timestamp': 1645764984, 'upload_date': '20220225', }, - 'skip': 'This video get Geo-blocked for some country' + 'skip': 'This video get Geo-blocked for some country', }, { # video with comments 'url': 'https://netverse.id/video/episode-1-season-2016-ok-food', @@ -161,8 +161,8 @@ class NetverseIE(NetverseBaseIE): 'comment_count': int, }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # video with multiple page comment 'url': 'https://netverse.id/video/match-island-eps-1-fix', @@ -188,8 +188,8 @@ class NetverseIE(NetverseBaseIE): 'comment_count': int, }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/netzkino.py b/yt_dlp/extractor/netzkino.py index e9422ee..c07b171 100644 --- a/yt_dlp/extractor/netzkino.py +++ b/yt_dlp/extractor/netzkino.py @@ -26,7 +26,7 @@ class NetzkinoIE(InfoExtractor): }, 'params': { 'skip_download': 'Download only works from Germany', - } + }, }, { 'url': 'https://www.netzkino.de/#!/filme/dr-jekyll-mrs-hyde-2', 'md5': 'c7728b2dadd04ff6727814847a51ef03', @@ -42,14 +42,14 @@ class NetzkinoIE(InfoExtractor): }, 'params': { 'skip_download': 'Download only works from Germany', - } + }, }] def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') - api_url = 'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/%s.json?d=www' % video_id + api_url = f'https://api.netzkino.de.simplecache.net/capi-2.0a/movies/{video_id}.json?d=www' info = self._download_json(api_url, video_id) custom_fields = info['custom_fields'] diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 67e52ef..9f5a464 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -86,7 +86,7 @@ class NewgroundsIE(InfoExtractor): 'description': 'md5:9246c181614e23754571995104da92e0', 'age_limit': 13, 'thumbnail': r're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png', - } + }, }, { 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash', 'md5': '5d05585a9a0caca059f5abfbd3865524', @@ -101,7 +101,7 @@ class NewgroundsIE(InfoExtractor): 'view_count': int, 'age_limit': 13, 'thumbnail': r're:^https://picon\.ngfiles\.com/297000/flash_297383_card\.png', - } + }, }, { 'url': 'https://www.newgrounds.com/portal/view/823109', 'info_dict': { @@ -115,7 +115,7 @@ class NewgroundsIE(InfoExtractor): 'view_count': int, 'age_limit': 18, 'thumbnail': r're:^https://picon\.ngfiles\.com/823000/flash_823109_card\.png', - } + }, }] _AGE_LIMIT = { 'e': 0, @@ -132,7 +132,7 @@ class NewgroundsIE(InfoExtractor): result = self._download_json(login_url, None, 'Logging in', headers={ 'Accept': 'application/json', 'Referer': self._LOGIN_URL, - 'X-Requested-With': 'XMLHttpRequest' + 'X-Requested-With': 'XMLHttpRequest', }, data=urlencode_postdata({ **self._hidden_inputs(login_webpage), 'username': username, @@ -164,7 +164,7 @@ class NewgroundsIE(InfoExtractor): json_video = self._download_json(f'https://www.newgrounds.com/portal/video/{media_id}', media_id, headers={ 'Accept': 'application/json', 'Referer': url, - 'X-Requested-With': 'XMLHttpRequest' + 'X-Requested-With': 'XMLHttpRequest', }) formats = [] diff --git a/yt_dlp/extractor/newsy.py b/yt_dlp/extractor/newsy.py index a5a7b16..941cb93 100644 --- a/yt_dlp/extractor/newsy.py +++ b/yt_dlp/extractor/newsy.py @@ -19,9 +19,9 @@ class NewsyIE(InfoExtractor): 'timestamp': 1621339200, 'duration': 339630, 'thumbnail': 'https://cdn.newsy.com/images/videos/x/1620927824_xyrrP4.jpg', - 'upload_date': '20210518' + 'upload_date': '20210518', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/nextmedia.py b/yt_dlp/extractor/nextmedia.py index 871d3e6..81da3ff 100644 --- a/yt_dlp/extractor/nextmedia.py +++ b/yt_dlp/extractor/nextmedia.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( clean_html, get_element_by_class, @@ -24,7 +25,7 @@ class NextMediaIE(InfoExtractor): 'description': 'md5:28222b9912b6665a21011b034c70fcc7', 'timestamp': 1415456273, 'upload_date': '20141108', - } + }, }] _URL_PATTERN = r'\{ url: \'(.+)\' \}' @@ -39,7 +40,7 @@ class NextMediaIE(InfoExtractor): r'window\.location\.href\s*=\s*([\'"])(?P<url>(?!\1).+)\1', page, 'redirection URL', default=None, group='url') if redirection_url: - return self.url_result(compat_urlparse.urljoin(url, redirection_url)) + return self.url_result(urllib.parse.urljoin(url, redirection_url)) title = self._fetch_title(page) video_url = self._search_regex(self._URL_PATTERN, page, 'video url') @@ -67,8 +68,8 @@ class NextMediaIE(InfoExtractor): return self._og_search_thumbnail(page) def _fetch_timestamp(self, page): - dateCreated = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time') - return parse_iso8601(dateCreated) + date_created = self._search_regex('"dateCreated":"([^"]+)"', page, 'created time') + return parse_iso8601(date_created) def _fetch_upload_date(self, url): return self._search_regex(self._VALID_URL, url, 'upload date', group='date') @@ -91,7 +92,7 @@ class NextMediaActionNewsIE(NextMediaIE): # XXX: Do not subclass from concrete 'description': 'md5:cd802fad1f40fd9ea178c1e2af02d659', 'timestamp': 1421791200, 'upload_date': '20150120', - } + }, }] def _real_extract(self, url): @@ -115,7 +116,7 @@ class AppleDailyIE(NextMediaIE): # XXX: Do not subclass from concrete IE 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:2acd430e59956dc47cd7f67cb3c003f4', 'upload_date': '20150128', - } + }, }, { 'url': 'http://www.appledaily.com.tw/realtimenews/article/strange/20150128/550549/%E4%B8%8D%E6%BB%BF%E8%A2%AB%E8%B8%A9%E8%85%B3%E3%80%80%E5%B1%B1%E6%9D%B1%E5%85%A9%E5%A4%A7%E5%AA%BD%E4%B8%80%E8%B7%AF%E6%89%93%E4%B8%8B%E8%BB%8A', 'md5': '86b4e9132d158279c7883822d94ccc49', @@ -126,7 +127,7 @@ class AppleDailyIE(NextMediaIE): # XXX: Do not subclass from concrete IE 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:175b4260c1d7c085993474217e4ab1b4', 'upload_date': '20150128', - } + }, }, { 'url': 'http://www.appledaily.com.tw/animation/realtimenews/new/20150128/5003671', 'md5': '03df296d95dedc2d5886debbb80cb43f', diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index b4874c8..cd32892 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -4,7 +4,6 @@ import re import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -41,7 +40,7 @@ class NexxIE(InfoExtractor): 'timestamp': 1384264416, 'upload_date': '20131112', }, - 'skip': 'Spiegel nexx CDNs are now disabled' + 'skip': 'Spiegel nexx CDNs are now disabled', }, { # episode with captions 'url': 'https://api.nexx.cloud/v3.1/741/videos/byid/1701834', @@ -92,7 +91,7 @@ class NexxIE(InfoExtractor): 'timestamp': 1527874460, 'upload_date': '20180601', }, - 'skip': 'Spiegel nexx CDNs are now disabled' + 'skip': 'Spiegel nexx CDNs are now disabled', }, { 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907', 'only_matching': True, @@ -128,8 +127,7 @@ class NexxIE(InfoExtractor): r'(?is)onPLAYReady.+?_play\.(?:init|(?:control\.)?addPlayer)\s*\(.+?\s*,\s*["\']?(\d+)', webpage): entries.append( - 'https://api.nexx.cloud/v3/%s/videos/byid/%s' - % (domain_id, video_id)) + f'https://api.nexx.cloud/v3/{domain_id}/videos/byid/{video_id}') # TODO: support more embed formats @@ -137,20 +135,20 @@ class NexxIE(InfoExtractor): def _handle_error(self, response): if traverse_obj(response, ('metadata', 'notice'), expected_type=str): - self.report_warning('%s said: %s' % (self.IE_NAME, response['metadata']['notice'])) + self.report_warning('{} said: {}'.format(self.IE_NAME, response['metadata']['notice'])) status = int_or_none(try_get( response, lambda x: x['metadata']['status']) or 200) if 200 <= status < 300: return raise ExtractorError( - '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']), + '{} said: {}'.format(self.IE_NAME, response['metadata']['errorhint']), expected=True) def _call_api(self, domain_id, path, video_id, data=None, headers={}): headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8' result = self._download_json( - 'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id, - 'Downloading %s JSON' % path, data=urlencode_postdata(data), + f'https://api.nexx.cloud/v3/{domain_id}/{path}', video_id, + f'Downloading {path} JSON', data=urlencode_postdata(data), headers=headers) self._handle_error(result) return result['result'] @@ -160,20 +158,20 @@ class NexxIE(InfoExtractor): cdn = stream_data['cdnType'] assert cdn == 'free' - hash = video['general']['hash'] + video_hash = video['general']['hash'] - ps = compat_str(stream_data['originalDomain']) + ps = str(stream_data['originalDomain']) if stream_data['applyFolderHierarchy'] == 1: s = ('%04d' % int(video_id))[::-1] - ps += '/%s/%s' % (s[0:2], s[2:4]) - ps += '/%s/%s_' % (video_id, hash) + ps += f'/{s[0:2]}/{s[2:4]}' + ps += f'/{video_id}/{video_hash}_' t = 'http://%s' + ps fd = stream_data['azureFileDistribution'].split(',') cdn_provider = stream_data['cdnProvider'] def p0(p): - return '_%s' % p if stream_data['applyAzureStructure'] == 1 else '' + return f'_{p}' if stream_data['applyAzureStructure'] == 1 else '' formats = [] if cdn_provider == 'ak': @@ -191,10 +189,10 @@ class NexxIE(InfoExtractor): for i in fd: p = i.split(':') tbr = int(p[0]) - filename = '%s%s%s.mp4' % (h, p[1], p0(tbr)) + filename = f'{h}{p[1]}{p0(tbr)}.mp4' f = { 'url': http_base + '/' + filename, - 'format_id': '%s-http-%d' % (cdn, tbr), + 'format_id': f'{cdn}-http-{tbr}', 'tbr': tbr, } width_height = p[1].split('x') @@ -204,7 +202,7 @@ class NexxIE(InfoExtractor): 'height': int_or_none(width_height[1]), }) formats.append(f) - a = filename + ':%s' % (tbr * 1000) + a = filename + f':{tbr * 1000}' t += a + ',' t = t[:-1] + '&audiostream=' + a.split(':')[0] else: @@ -213,10 +211,10 @@ class NexxIE(InfoExtractor): if cdn_provider == 'ce': formats.extend(self._extract_mpd_formats( t % (stream_data['cdnPathDASH'], 'mpd'), video_id, - mpd_id='%s-dash' % cdn, fatal=False)) + mpd_id=f'{cdn}-dash', fatal=False)) formats.extend(self._extract_m3u8_formats( t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False)) + entry_protocol='m3u8_native', m3u8_id=f'{cdn}-hls', fatal=False)) return formats @@ -231,9 +229,9 @@ class NexxIE(InfoExtractor): def get_cdn_shield_base(shield_type=''): for secure in ('', 's'): - cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper())) + cdn_shield = stream_data.get(f'cdnShield{shield_type}HTTP{secure.upper()}') if cdn_shield: - return 'http%s://%s' % (secure, cdn_shield) + return f'http{secure}://{cdn_shield}' return f'http://sdn-global-{"prog" if shield_type.lower() == "prog" else "streaming"}-cache.3qsdn.com/' + (f's/{protection_key}/' if protection_key else '') stream_base = get_cdn_shield_base() @@ -256,7 +254,7 @@ class NexxIE(InfoExtractor): tbr = int_or_none(ss[1], scale=1000) formats.append({ 'url': f'{progressive_base}{q_acc}/uploads/{q_acc}-{ss[2]}.webm', - 'format_id': f'{cdn}-{ss[0]}{"-%s" % tbr if tbr else ""}', + 'format_id': f'{cdn}-{ss[0]}{f"-{tbr}" if tbr else ""}', 'tbr': tbr, }) @@ -270,7 +268,7 @@ class NexxIE(InfoExtractor): width, height = ss[1].split('x') if len(ss[1].split('x')) == 2 else (None, None) f = { 'url': f'{progressive_base}{q_acc}/files/{q_prefix}/{q_locator}/{ss[2]}.mp4', - 'format_id': f'{cdn}-http-{"-%s" % tbr if tbr else ""}', + 'format_id': f'{cdn}-http-{f"-{tbr}" if tbr else ""}', 'tbr': tbr, 'width': int_or_none(width), 'height': int_or_none(height), @@ -288,38 +286,37 @@ class NexxIE(InfoExtractor): def get_cdn_shield_base(shield_type='', static=False): for secure in ('', 's'): - cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper())) + cdn_shield = stream_data.get(f'cdnShield{shield_type}HTTP{secure.upper()}') if cdn_shield: - return 'http%s://%s' % (secure, cdn_shield) + return f'http{secure}://{cdn_shield}' + if 'fb' in stream_data['azureAccount']: + prefix = 'df' if static else 'f' else: - if 'fb' in stream_data['azureAccount']: - prefix = 'df' if static else 'f' - else: - prefix = 'd' if static else 'p' - account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', '')) - return 'http://nx-%s%02d.akamaized.net/' % (prefix, account) + prefix = 'd' if static else 'p' + account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', '')) + return 'http://nx-%s%02d.akamaized.net/' % (prefix, account) language = video['general'].get('language_raw') or '' azure_stream_base = get_cdn_shield_base() is_ml = ',' in language - azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % ( + azure_manifest_url = '{}{}/{}_src{}.ism/Manifest'.format( azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s' protection_token = try_get( - video, lambda x: x['protectiondata']['token'], compat_str) + video, lambda x: x['protectiondata']['token'], str) if protection_token: - azure_manifest_url += '?hdnts=%s' % protection_token + azure_manifest_url += f'?hdnts={protection_token}' formats = self._extract_m3u8_formats( azure_manifest_url % '(format=m3u8-aapl)', video_id, 'mp4', 'm3u8_native', - m3u8_id='%s-hls' % cdn, fatal=False) + m3u8_id=f'{cdn}-hls', fatal=False) formats.extend(self._extract_mpd_formats( azure_manifest_url % '(format=mpd-time-csf)', - video_id, mpd_id='%s-dash' % cdn, fatal=False)) + video_id, mpd_id=f'{cdn}-dash', fatal=False)) formats.extend(self._extract_ism_formats( - azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False)) + azure_manifest_url % '', video_id, ism_id=f'{cdn}-mss', fatal=False)) azure_progressive_base = get_cdn_shield_base('Prog', True) azure_file_distribution = stream_data.get('azureFileDistribution') @@ -332,9 +329,8 @@ class NexxIE(InfoExtractor): tbr = int_or_none(ss[0]) if tbr: f = { - 'url': '%s%s/%s_src_%s_%d.mp4' % ( - azure_progressive_base, azure_locator, video_id, ss[1], tbr), - 'format_id': '%s-http-%d' % (cdn, tbr), + 'url': f'{azure_progressive_base}{azure_locator}/{video_id}_src_{ss[1]}_{tbr}.mp4', + 'format_id': f'{cdn}-http-{tbr}', 'tbr': tbr, } width_height = ss[1].split('x') @@ -365,7 +361,7 @@ class NexxIE(InfoExtractor): return None response = self._download_json( - 'https://arc.nexx.cloud/api/video/%s.json' % video_id, + f'https://arc.nexx.cloud/api/video/{video_id}.json', video_id, fatal=False) if response and isinstance(response, dict): result = response.get('result') @@ -375,9 +371,7 @@ class NexxIE(InfoExtractor): # not all videos work via arc, e.g. nexx:741:1269984 if not video: # Reverse engineered from JS code (see getDeviceID function) - device_id = '%d:%d:%d%d' % ( - random.randint(1, 4), int(time.time()), - random.randint(1e4, 99999), random.randint(1, 9)) + device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(1e4, 99999)}{random.randint(1, 9)}' result = self._call_api(domain_id, 'session/init', video_id, data={ 'nxp_devh': device_id, @@ -416,10 +410,10 @@ class NexxIE(InfoExtractor): # Reversed from JS code for _play.api.call function (search for # X-Request-Token) request_token = hashlib.md5( - ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest() + ''.join((op, domain_id, secret)).encode()).hexdigest() result = self._call_api( - domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={ + domain_id, f'videos/{op}/{video_id}', video_id, data={ 'additionalfields': 'language,channel,format,licenseby,slug,fileversion,episode,season', 'addInteractionOptions': '1', 'addStatusDetails': '1', @@ -460,13 +454,13 @@ class NexxIE(InfoExtractor): 'data': '\n\n'.join( f'{i + 1}\n{srt_subtitles_timecode(line["fromms"] / 1000)} --> {srt_subtitles_timecode(line["toms"] / 1000)}\n{line["caption"]}' for i, line in enumerate(sub['data'])), - 'name': sub.get('language_long') or sub.get('title') + 'name': sub.get('language_long') or sub.get('title'), }) elif sub.get('url'): subtitles.setdefault(sub.get('language', 'en'), []).append({ 'url': sub['url'], 'ext': sub.get('format'), - 'name': sub.get('language_long') or sub.get('title') + 'name': sub.get('language_long') or sub.get('title'), }) return { @@ -477,7 +471,7 @@ class NexxIE(InfoExtractor): 'release_year': int_or_none(general.get('year')), 'creator': general.get('studio') or general.get('studio_adref') or None, 'thumbnail': try_get( - video, lambda x: x['imagedata']['thumb'], compat_str), + video, lambda x: x['imagedata']['thumb'], str), 'duration': parse_duration(general.get('runtime')), 'timestamp': int_or_none(general.get('uploaded')), 'episode_number': traverse_obj( diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py index be732a3..ec746ec 100644 --- a/yt_dlp/extractor/nfhsnetwork.py +++ b/yt_dlp/extractor/nfhsnetwork.py @@ -17,12 +17,12 @@ class NFHSNetworkIE(InfoExtractor): 'uploader_url': 'https://www.nfhsnetwork.com/schools/rockford-high-school-rockford-mi', 'location': 'Rockford, Michigan', 'timestamp': 1616859000, - 'upload_date': '20210327' + 'upload_date': '20210327', }, 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # Non-sport activity with description 'url': 'https://www.nfhsnetwork.com/events/limon-high-school-limon-co/evt4a30e3726c', @@ -36,12 +36,12 @@ class NFHSNetworkIE(InfoExtractor): 'uploader_url': 'https://www.nfhsnetwork.com/schools/limon-high-school-limon-co', 'location': 'Limon, Colorado', 'timestamp': 1607893200, - 'upload_date': '20201213' + 'upload_date': '20201213', }, 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # Postseason game 'url': 'https://www.nfhsnetwork.com/events/nfhs-network-special-events/dd8de71d45', @@ -54,12 +54,12 @@ class NFHSNetworkIE(InfoExtractor): 'uploader_url': 'https://www.nfhsnetwork.com/affiliates/socal-sports-productions', 'location': 'San Diego, California', 'timestamp': 1451187000, - 'upload_date': '20151226' + 'upload_date': '20151226', }, 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # Video with no broadcasts object 'url': 'https://www.nfhsnetwork.com/events/wiaa-wi/9aa2f92f82', @@ -73,13 +73,13 @@ class NFHSNetworkIE(InfoExtractor): 'uploader_url': 'https://www.nfhsnetwork.com/associations/wiaa-wi', 'location': 'Stevens Point, Wisconsin', 'timestamp': 1421856000, - 'upload_date': '20150121' + 'upload_date': '20150121', }, 'params': { # m3u8 download 'skip_download': True, - } - } + }, + }, ] def _real_extract(self, url): @@ -91,17 +91,17 @@ class NFHSNetworkIE(InfoExtractor): publisher = data.get('publishers')[0] # always exists broadcast = (publisher.get('broadcasts') or publisher.get('vods'))[0] # some (older) videos don't have a broadcasts object uploader = publisher.get('formatted_name') or publisher.get('name') - uploaderID = publisher.get('publisher_key') - pubType = publisher.get('type') - uploaderPrefix = ( - "schools" if pubType == "school" - else "associations" if "association" in pubType - else "affiliates" if (pubType == "publisher" or pubType == "affiliate") - else "schools") - uploaderPage = 'https://www.nfhsnetwork.com/%s/%s' % (uploaderPrefix, publisher.get('slug')) - location = '%s, %s' % (data.get('city'), data.get('state_name')) + uploader_id = publisher.get('publisher_key') + pub_type = publisher.get('type') + uploader_prefix = ( + 'schools' if pub_type == 'school' + else 'associations' if 'association' in pub_type + else 'affiliates' if (pub_type == 'publisher' or pub_type == 'affiliate') + else 'schools') + uploader_page = 'https://www.nfhsnetwork.com/{}/{}'.format(uploader_prefix, publisher.get('slug')) + location = '{}, {}'.format(data.get('city'), data.get('state_name')) description = broadcast.get('description') - isLive = broadcast.get('on_air') or broadcast.get('status') == 'on_air' or False + is_live = broadcast.get('on_air') or broadcast.get('status') == 'on_air' or False timestamp = unified_timestamp(data.get('local_start_time')) upload_date = unified_strdate(data.get('local_start_time')) @@ -111,13 +111,13 @@ class NFHSNetworkIE(InfoExtractor): or self._html_search_regex(r'<h1 class="sr-hidden">(.*?)</h1>', webpage, 'title')) title = title.split('|')[0].strip() - video_type = 'broadcasts' if isLive else 'vods' - key = broadcast.get('key') if isLive else try_get(publisher, lambda x: x['vods'][0]['key']) + video_type = 'broadcasts' if is_live else 'vods' + key = broadcast.get('key') if is_live else try_get(publisher, lambda x: x['vods'][0]['key']) m3u8_url = self._download_json( - 'https://cfunity.nfhsnetwork.com/v2/%s/%s/url' % (video_type, key), + f'https://cfunity.nfhsnetwork.com/v2/{video_type}/{key}/url', video_id).get('video_url') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=isLive) + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', live=is_live) return { 'id': video_id, @@ -126,10 +126,10 @@ class NFHSNetworkIE(InfoExtractor): 'description': description, 'timestamp': timestamp, 'uploader': uploader, - 'uploader_id': uploaderID, - 'uploader_url': uploaderPage, + 'uploader_id': uploader_id, + 'uploader_url': uploader_page, 'location': location, 'upload_date': upload_date, - 'is_live': isLive, + 'is_live': is_live, '_format_sort_fields': ('res', 'tbr'), } diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py index 3f83cd2..c537c1c 100644 --- a/yt_dlp/extractor/nfl.py +++ b/yt_dlp/extractor/nfl.py @@ -195,7 +195,7 @@ class NFLIE(NFLBaseIE): 'tags': 'count:6', 'duration': 157, 'categories': 'count:3', - } + }, }, { 'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown', 'md5': '6886b32c24b463038c760ceb55a34566', @@ -332,7 +332,7 @@ class NFLPlusReplayIE(NFLBaseIE): def entries(): for replay in traverse_obj( - replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types) + replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types), ): video_id = replay['mcpPlaybackId'] yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 8bb017a..0bd6edf 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, + filter_dict, get_element_by_class, int_or_none, join_nonempty, @@ -104,7 +105,7 @@ class NhkBaseIE(InfoExtractor): if not img_path: continue thumbnails.append({ - 'id': '%dp' % h, + 'id': f'{h}p', 'height': h, 'width': w, 'url': 'https://www3.nhk.or.jp' + img_path, @@ -211,7 +212,7 @@ class NhkVodIE(NhkBaseIE): 'series': 'Living in Japan', 'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab', 'thumbnail': r're:https://.+/.+\.jpg', - 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines' + 'episode': 'Tips for Travelers to Japan / Ramen Vending Machines', }, }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', @@ -458,7 +459,7 @@ class NhkForSchoolBangumiIE(InfoExtractor): 'upload_date': '20140402', 'ext': 'mp4', - 'chapters': 'count:12' + 'chapters': 'count:12', }, 'params': { # m3u8 download @@ -521,7 +522,8 @@ class NhkForSchoolSubjectIE(InfoExtractor): 'eigo', 'tokkatsu', 'tokushi', 'sonota', ) - _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>%s)/?(?:[\?#].*)?$' % '|'.join(re.escape(s) for s in KNOWN_SUBJECTS) + _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>{})/?(?:[\?#].*)?$'.format( + '|'.join(re.escape(s) for s in KNOWN_SUBJECTS)) _TESTS = [{ 'url': 'https://www.nhk.or.jp/school/sougou/', @@ -551,9 +553,8 @@ class NhkForSchoolSubjectIE(InfoExtractor): class NhkForSchoolProgramListIE(InfoExtractor): - _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>(?:%s)/[a-zA-Z0-9_-]+)' % ( - '|'.join(re.escape(s) for s in NhkForSchoolSubjectIE.KNOWN_SUBJECTS) - ) + _VALID_URL = r'https?://www\.nhk\.or\.jp/school/(?P<id>(?:{})/[a-zA-Z0-9_-]+)'.format( + '|'.join(re.escape(s) for s in NhkForSchoolSubjectIE.KNOWN_SUBJECTS)) _TESTS = [{ 'url': 'https://www.nhk.or.jp/school/sougou/q/', 'info_dict': { @@ -590,21 +591,22 @@ class NhkRadiruIE(InfoExtractor): IE_DESC = 'NHK らじる (Radiru/Rajiru)' _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?' _TESTS = [{ - 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210', - 'skip': 'Episode expired on 2024-02-24', + 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239', + 'skip': 'Episode expired on 2024-06-09', 'info_dict': { - 'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス', - 'id': '0449_01_3926210', + 'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集', + 'id': '0449_01_4003239', 'ext': 'm4a', + 'uploader': 'NHK FM 東京', + 'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc', 'series': 'ジャズ・トゥナイト', - 'uploader': 'NHK-FM', - 'channel': 'NHK-FM', + 'channel': 'NHK FM 東京', 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg', - 'release_date': '20240217', - 'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811', - 'timestamp': 1708185600, - 'release_timestamp': 1708178400, - 'upload_date': '20240217', + 'upload_date': '20240601', + 'series_id': '0449_01', + 'release_date': '20240601', + 'timestamp': 1717257600, + 'release_timestamp': 1717250400, }, }, { # playlist, airs every weekday so it should _hopefully_ be okay forever @@ -613,71 +615,145 @@ class NhkRadiruIE(InfoExtractor): 'id': '0458_01', 'title': 'ベストオブクラシック', 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。', - 'channel': 'NHK-FM', - 'uploader': 'NHK-FM', 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg', + 'series_id': '0458_01', + 'uploader': 'NHK FM', + 'channel': 'NHK FM', + 'series': 'ベストオブクラシック', }, 'playlist_mincount': 3, }, { # one with letters in the id - 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470', - 'note': 'Expires on 2024-03-31', + 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688', + 'note': 'Expires on 2025-03-31', 'info_dict': { - 'id': 'F300_06_3738470', + 'id': 'F683_01_3910688', 'ext': 'm4a', - 'title': '有島武郎「一房のぶどう」', - 'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n(2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より)', - 'channel': 'NHKラジオ第1、NHK-FM', - 'uploader': 'NHKラジオ第1、NHK-FM', - 'timestamp': 1635757200, - 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg', - 'release_date': '20161207', - 'series': 'らじる文庫 by ラジオ深夜便 ', - 'release_timestamp': 1481126700, - 'upload_date': '20211101', + 'title': '夏目漱石「文鳥」第1回', + 'series': '【らじる文庫】夏目漱石「文鳥」(全4回)', + 'series_id': 'F683_01', + 'description': '朗読:浅井理アナウンサー', + 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg', + 'upload_date': '20240106', + 'release_date': '20240106', + 'uploader': 'NHK R1', + 'release_timestamp': 1704511800, + 'channel': 'NHK R1', + 'timestamp': 1704512700, }, - 'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'], + 'expected_warnings': ['Unable to download JSON metadata', + 'Failed to get extended metadata. API returned Error 1: Invalid parameters'], }, { # news - 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109', - 'skip': 'Expires on 2023-04-17', + 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173', 'info_dict': { - 'id': 'F261_01_3855109', + 'id': 'F261_01_4012173', 'ext': 'm4a', 'channel': 'NHKラジオ第1', 'uploader': 'NHKラジオ第1', - 'timestamp': 1681635900, - 'release_date': '20230416', 'series': 'NHKラジオニュース', - 'title': '午後6時のNHKニュース', + 'title': '午前0時のNHKニュース', 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', - 'upload_date': '20230416', - 'release_timestamp': 1681635600, + 'release_timestamp': 1718290800, + 'release_date': '20240613', + 'timestamp': 1718291400, + 'upload_date': '20240613', }, + }, { + # fallback when extended metadata fails + 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298', + 'skip': 'Expires on 2024-06-07', + 'info_dict': { + 'id': '2834_01_4009298', + 'title': 'まち☆キラ!開成町特集', + 'ext': 'm4a', + 'release_date': '20240531', + 'upload_date': '20240531', + 'series': 'はま☆キラ!', + 'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg', + 'channel': 'NHK R1,FM', + 'description': '', + 'timestamp': 1717123800, + 'uploader': 'NHK R1,FM', + 'release_timestamp': 1717120800, + 'series_id': '2834_01', + }, + 'expected_warnings': ['Failed to get extended metadata. API returned empty list.'], }] _API_URL_TMPL = None - def _extract_extended_description(self, episode_id, episode): - service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')})) - aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str})) + def _extract_extended_metadata(self, episode_id, aa_vinfo): + service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')})) detail_url = try_call( - lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3)) + lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3])) if not detail_url: - return + return {} + + response = self._download_json( + detail_url, episode_id, 'Downloading extended metadata', + 'Failed to download extended metadata', fatal=False, expected_status=400) + if not response: + return {} + + if error := traverse_obj(response, ('error', {dict})): + self.report_warning( + 'Failed to get extended metadata. API returned ' + f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}') + return {} + + full_meta = traverse_obj(response, ('list', service, 0, {dict})) + if not full_meta: + self.report_warning('Failed to get extended metadata. API returned empty list.') + return {} + + station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None + thumbnails = [{ + 'id': str(id_), + 'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1, + **traverse_obj(thumb, { + 'url': 'url', + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + } for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))] + + return filter_dict({ + 'channel': station, + 'uploader': station, + 'description': join_nonempty( + 'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta), + 'thumbnails': thumbnails, + **traverse_obj(full_meta, { + 'title': ('title', {str}), + 'timestamp': ('end_time', {unified_timestamp}), + 'release_timestamp': ('start_time', {unified_timestamp}), + }), + }) + + def _extract_episode_info(self, episode, programme_id, series_meta): + episode_id = f'{programme_id}_{episode["id"]}' + aa_vinfo = traverse_obj(episode, ('aa_contents_id', {lambda x: x.split(';')})) + extended_metadata = self._extract_extended_metadata(episode_id, aa_vinfo) + fallback_start_time, _, fallback_end_time = traverse_obj( + aa_vinfo, (4, {str}, {lambda x: (x or '').partition('_')})) - full_meta = traverse_obj( - self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False), - ('list', service, 0, {dict})) or {} - return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta) + return { + **series_meta, + 'id': episode_id, + 'formats': self._extract_m3u8_formats(episode.get('stream_url'), episode_id, fatal=False), + 'container': 'm4a_dash', # force fixup, AAC-only HLS + 'was_live': True, + 'title': episode.get('program_title'), + 'description': episode.get('program_sub_title'), # fallback + 'timestamp': unified_timestamp(fallback_end_time), + 'release_timestamp': unified_timestamp(fallback_start_time), + **extended_metadata, + } - def _extract_episode_info(self, headline, programme_id, series_meta): + def _extract_news_info(self, headline, programme_id, series_meta): episode_id = f'{programme_id}_{headline["headline_id"]}' episode = traverse_obj(headline, ('file_list', 0, {dict})) - description = self._extract_extended_description(episode_id, episode) - if not description: - self.report_warning('Failed to get extended description, falling back to summary') - description = traverse_obj(episode, ('file_title_sub', {str})) return { **series_meta, @@ -687,9 +763,9 @@ class NhkRadiruIE(InfoExtractor): 'was_live': True, 'series': series_meta.get('title'), 'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'), - 'description': description, **traverse_obj(episode, { - 'title': 'file_title', + 'title': ('file_title', {str}), + 'description': ('file_title_sub', {str}), 'timestamp': ('open_time', {unified_timestamp}), 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}), }), @@ -706,32 +782,58 @@ class NhkRadiruIE(InfoExtractor): site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline') programme_id = f'{site_id}_{corner_id}' - if site_id == 'F261': - json_url = 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json' - else: - json_url = f'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json' - - meta = self._download_json(json_url, programme_id)['main'] + if site_id == 'F261': # XXX: News programmes use old API (for now?) + meta = self._download_json( + 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main'] + series_meta = traverse_obj(meta, { + 'title': ('program_name', {str}), + 'channel': ('media_name', {str}), + 'uploader': ('media_name', {str}), + 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}), + }, get_all=False) + + if headline_id: + headline = traverse_obj( + meta, ('detail_list', lambda _, v: v['headline_id'] == headline_id, any)) + if not headline: + raise ExtractorError('Content not found; it has most likely expired', expected=True) + return self._extract_news_info(headline, programme_id, series_meta) + + def news_entries(): + for headline in traverse_obj(meta, ('detail_list', ..., {dict})): + yield self._extract_news_info(headline, programme_id, series_meta) + + return self.playlist_result( + news_entries(), programme_id, description=meta.get('site_detail'), **series_meta) + + meta = self._download_json( + 'https://www.nhk.or.jp/radio-api/app/v1/web/ondemand/series', programme_id, query={ + 'site_id': site_id, + 'corner_site_id': corner_id, + }) - series_meta = traverse_obj(meta, { - 'title': 'program_name', - 'channel': 'media_name', - 'uploader': 'media_name', - 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}), - }, get_all=False) + fallback_station = join_nonempty('NHK', traverse_obj(meta, ('radio_broadcast', {str})), delim=' ') + series_meta = { + 'series': join_nonempty('title', 'corner_name', delim=' ', from_dict=meta), + 'series_id': programme_id, + 'thumbnail': traverse_obj(meta, ('thumbnail_url', {url_or_none})), + 'channel': fallback_station, + 'uploader': fallback_station, + } if headline_id: - return self._extract_episode_info( - traverse_obj(meta, ( - 'detail_list', lambda _, v: v['headline_id'] == headline_id), get_all=False), - programme_id, series_meta) + episode = traverse_obj(meta, ('episodes', lambda _, v: v['id'] == int(headline_id), any)) + if not episode: + raise ExtractorError('Content not found; it has most likely expired', expected=True) + return self._extract_episode_info(episode, programme_id, series_meta) def entries(): - for headline in traverse_obj(meta, ('detail_list', ..., {dict})): - yield self._extract_episode_info(headline, programme_id, series_meta) + for episode in traverse_obj(meta, ('episodes', ..., {dict})): + yield self._extract_episode_info(episode, programme_id, series_meta) return self.playlist_result( - entries(), programme_id, playlist_description=meta.get('site_detail'), **series_meta) + entries(), programme_id, title=series_meta.get('series'), + description=meta.get('series_description'), **series_meta) class NhkRadioNewsPageIE(InfoExtractor): @@ -747,7 +849,7 @@ class NhkRadioNewsPageIE(InfoExtractor): 'channel': 'NHKラジオ第1', 'uploader': 'NHKラジオ第1', 'title': 'NHKラジオニュース', - } + }, }] def _real_extract(self, url): @@ -789,7 +891,7 @@ class NhkRadiruLiveIE(InfoExtractor): 'ext': 'm4a', 'thumbnail': 'https://www.nhk.or.jp/common/img/media/fm-200x200.png', 'live_status': 'is_live', - } + }, }] _NOA_STATION_IDS = {'r1': 'n1', 'r2': 'n2', 'fm': 'n3'} @@ -803,8 +905,8 @@ class NhkRadiruLiveIE(InfoExtractor): data = config.find(f'.//data//area[.="{area}"]/..') if not data: - raise ExtractorError('Invalid area. Valid areas are: %s' % ', '.join( - [i.text for i in config.findall('.//data//area')]), expected=True) + raise ExtractorError('Invalid area. Valid areas are: {}'.format(', '.join( + [i.text for i in config.findall('.//data//area')])), expected=True) noa_info = self._download_json( f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text), @@ -812,7 +914,7 @@ class NhkRadiruLiveIE(InfoExtractor): present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present')) return { - 'title': ' '.join(traverse_obj(present_info, (('service', 'area',), 'name', {str}))), + 'title': ' '.join(traverse_obj(present_info, (('service', 'area'), 'name', {str}))), 'id': join_nonempty(station, area), 'thumbnails': traverse_obj(present_info, ('service', 'images', ..., { 'url': 'url', diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py index 64cddb4..ca47a81 100644 --- a/yt_dlp/extractor/nhl.py +++ b/yt_dlp/extractor/nhl.py @@ -1,8 +1,8 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, + join_nonempty, parse_duration, parse_iso8601, ) @@ -12,8 +12,8 @@ class NHLBaseIE(InfoExtractor): def _real_extract(self, url): site, tmp_id = self._match_valid_url(url).groups() video_data = self._download_json( - 'https://%s/%s/%sid/v1/%s/details/web-v1.json' - % (self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id) + 'https://{}/{}/{}id/v1/{}/details/web-v1.json'.format( + self._CONTENT_DOMAIN, site[:3], 'item/' if site == 'mlb' else '', tmp_id), tmp_id) if video_data.get('type') != 'video': video_data = video_data['media'] video = video_data.get('video') @@ -24,7 +24,7 @@ class NHLBaseIE(InfoExtractor): if videos: video_data = videos[0] - video_id = compat_str(video_data['id']) + video_id = str(video_data['id']) title = video_data['title'] formats = [] @@ -42,7 +42,7 @@ class NHLBaseIE(InfoExtractor): else: height = int_or_none(playback.get('height')) formats.append({ - 'format_id': playback.get('name', 'http' + ('-%dp' % height if height else '')), + 'format_id': playback.get('name') or join_nonempty('http', height and f'{height}p'), 'url': playback_url, 'width': int_or_none(playback.get('width')), 'height': height, diff --git a/yt_dlp/extractor/nick.py b/yt_dlp/extractor/nick.py index 165d8ce..653b10b 100644 --- a/yt_dlp/extractor/nick.py +++ b/yt_dlp/extractor/nick.py @@ -22,7 +22,7 @@ class NickIE(MTVServicesInfoExtractor): 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S1', 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.', - } + }, }, { 'md5': '839a04f49900a1fcbf517020d94e0737', @@ -32,7 +32,7 @@ class NickIE(MTVServicesInfoExtractor): 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S2', 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.', - } + }, }, { 'md5': 'f1145699f199770e2919ee8646955d46', @@ -42,7 +42,7 @@ class NickIE(MTVServicesInfoExtractor): 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S3', 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.', - } + }, }, { 'md5': 'd463116875aee2585ee58de3b12caebd', @@ -52,7 +52,7 @@ class NickIE(MTVServicesInfoExtractor): 'title': 'SpongeBob SquarePants: "A Place for Pets/Lockdown for Love" S4', 'description': 'A Place for Pets/Lockdown for Love: When customers bring pets into the Krusty Krab, Mr. Krabs realizes pets are more profitable than owners. Plankton ruins another date with Karen, so she puts the Chum Bucket on lockdown until he proves his affection.', - } + }, }, ], }, { @@ -63,7 +63,7 @@ class NickIE(MTVServicesInfoExtractor): 'description': 'md5:9d65a66df38e02254852794b2809d1cf', 'title': 'Blue\'s Imagination Station', }, - 'skip': 'Not accessible?' + 'skip': 'Not accessible?', }] def _get_feed_query(self, uri): @@ -74,10 +74,10 @@ class NickIE(MTVServicesInfoExtractor): def _real_extract(self, url): domain, video_type, display_id = self._match_valid_url(url).groups() - if video_type.startswith("episodes"): + if video_type.startswith('episodes'): return super()._real_extract(url) video_data = self._download_json( - 'http://%s/data/video.endLevel.json' % domain, + f'http://{domain}/data/video.endLevel.json', display_id, query={ 'urlKey': display_id, }) @@ -184,7 +184,7 @@ class NickDeIE(MTVServicesInfoExtractor): def _get_feed_url(self, uri, url=None): video_id = self._id_from_uri(uri) config = self._download_json( - 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge&ref=%s' % (uri, url), video_id) + f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}', video_id) return self._remove_template_parameter(config['feedWithQueryParams']) @@ -221,4 +221,4 @@ class NickRuIE(MTVServicesInfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) mgid = self._extract_mgid(webpage, url) - return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) + return self.url_result(f'http://media.mtvnservices.com/embed/{mgid}') diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index b04ce96..9d7b010 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -213,7 +213,7 @@ class NiconicoIE(InfoExtractor): urljoin('https://account.nicovideo.jp', post_url), None, note='Performing MFA', errnote='Unable to complete MFA', data=urlencode_postdata({ - 'otp': self._get_tfa_info('6 digits code') + 'otp': self._get_tfa_info('6 digits code'), }), headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) @@ -264,7 +264,7 @@ class NiconicoIE(InfoExtractor): 'http_output_download_parameters': { 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']), 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']), - } + }, } elif dmc_protocol == 'hls': protocol = 'm3u8' @@ -277,14 +277,14 @@ class NiconicoIE(InfoExtractor): 'transfer_preset': '', 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']), 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']), - } + }, } if 'hls_encryption' in parsed_token and encryption: protocol_parameters['hls_parameters']['encryption'] = { parsed_token['hls_encryption']: { 'encrypted_key': encryption['encryptedKey'], 'key_uri': encryption['keyUri'], - } + }, } else: protocol = 'm3u8_native' @@ -295,7 +295,7 @@ class NiconicoIE(InfoExtractor): session_api_endpoint['url'], video_id, query={'_format': 'json'}, headers={'Content-Type': 'application/json'}, - note='Downloading JSON metadata for %s' % info_dict['format_id'], + note='Downloading JSON metadata for {}'.format(info_dict['format_id']), data=json.dumps({ 'session': { 'client_info': { @@ -305,7 +305,7 @@ class NiconicoIE(InfoExtractor): 'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]), 'content_key_timeout': session_api_data.get('contentKeyTimeout'), 'service_id': 'nicovideo', - 'service_user_id': session_api_data.get('serviceUserId') + 'service_user_id': session_api_data.get('serviceUserId'), }, 'content_id': session_api_data.get('contentId'), 'content_src_id_sets': [{ @@ -313,34 +313,34 @@ class NiconicoIE(InfoExtractor): 'src_id_to_mux': { 'audio_src_ids': [audio_src_id], 'video_src_ids': [video_src_id], - } - }] + }, + }], }], 'content_type': 'movie', 'content_uri': '', 'keep_method': { 'heartbeat': { - 'lifetime': session_api_data.get('heartbeatLifetime') - } + 'lifetime': session_api_data.get('heartbeatLifetime'), + }, }, 'priority': session_api_data['priority'], 'protocol': { 'name': 'http', 'parameters': { 'http_parameters': { - 'parameters': protocol_parameters - } - } + 'parameters': protocol_parameters, + }, + }, }, 'recipe_id': session_api_data.get('recipeId'), 'session_operation_auth': { 'session_operation_auth_by_signature': { 'signature': session_api_data.get('signature'), 'token': session_api_data.get('token'), - } + }, }, - 'timing_constraint': 'unlimited' - } + 'timing_constraint': 'unlimited', + }, }).encode()) info_dict['url'] = session_response['data']['session']['content_uri'] @@ -352,7 +352,7 @@ class NiconicoIE(InfoExtractor): 'data': json.dumps(session_response['data']), # interval, convert milliseconds to seconds, then halve to make a buffer. 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000), - 'ping': ping + 'ping': ping, } return info_dict, heartbeat_info_dict @@ -368,7 +368,7 @@ class NiconicoIE(InfoExtractor): vid_qual_label = traverse_obj(video_quality, ('metadata', 'label')) return { - 'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']), + 'url': 'niconico_dmc:{}/{}/{}'.format(video_id, video_quality['id'], audio_quality['id']), 'format_id': format_id, 'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '), 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4 @@ -389,7 +389,7 @@ class NiconicoIE(InfoExtractor): 'http_headers': { 'Origin': 'https://www.nicovideo.jp', 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, - } + }, } def _yield_dmc_formats(self, api_data, video_id): @@ -416,7 +416,7 @@ class NiconicoIE(InfoExtractor): dms_m3u8_url = self._download_json( f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id, data=json.dumps({ - 'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))) + 'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))), }).encode(), query={'actionTrackId': track_id}, headers={ 'x-access-right-key': access_key, 'x-frontend-id': 6, @@ -464,7 +464,7 @@ class NiconicoIE(InfoExtractor): except ExtractorError as e: try: api_data = self._download_json( - 'https://www.nicovideo.jp/api/watch/v3/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id, + f'https://www.nicovideo.jp/api/watch/v3/{video_id}?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_{round(time.time() * 1000)}', video_id, note='Downloading API JSON', errnote='Unable to fetch data')['data'] except ExtractorError: if not isinstance(e.cause, HTTPError): @@ -586,7 +586,7 @@ class NiconicoPlaylistBaseIE(InfoExtractor): _API_HEADERS = { 'X-Frontend-ID': '6', 'X-Frontend-Version': '0', - 'X-Niconico-Language': 'en-us' + 'X-Niconico-Language': 'en-us', } def _call_api(self, list_id, resource, query): @@ -601,7 +601,7 @@ class NiconicoPlaylistBaseIE(InfoExtractor): def _fetch_page(self, list_id, page): page += 1 - resp = self._call_api(list_id, 'page %d' % page, { + resp = self._call_api(list_id, f'page {page}', { 'page': page, 'pageSize': self._PAGE_SIZE, }) @@ -789,14 +789,14 @@ class NicovideoSearchURLIE(NicovideoSearchBaseIE): 'url': 'http://www.nicovideo.jp/search/sm9', 'info_dict': { 'id': 'sm9', - 'title': 'sm9' + 'title': 'sm9', }, 'playlist_mincount': 40, }, { 'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01', 'info_dict': { 'id': 'sm9', - 'title': 'sm9' + 'title': 'sm9', }, 'playlist_count': 31, }] @@ -814,7 +814,7 @@ class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor): 'url': 'nicosearchdateall:a', 'info_dict': { 'id': 'a', - 'title': 'a' + 'title': 'a', }, 'playlist_mincount': 1610, }] @@ -861,7 +861,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE): 'url': 'https://www.nicovideo.jp/tag/ドキュメンタリー淫夢', 'info_dict': { 'id': 'ドキュメンタリー淫夢', - 'title': 'ドキュメンタリー淫夢' + 'title': 'ドキュメンタリー淫夢', }, 'playlist_mincount': 400, }] @@ -880,12 +880,12 @@ class NiconicoUserIE(InfoExtractor): }, 'playlist_mincount': 101, } - _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s" + _API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s' _PAGE_SIZE = 100 _API_HEADERS = { 'X-Frontend-ID': '6', - 'X-Frontend-Version': '0' + 'X-Frontend-Version': '0', } def _entries(self, list_id): @@ -895,12 +895,12 @@ class NiconicoUserIE(InfoExtractor): json_parsed = self._download_json( self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id, headers=self._API_HEADERS, - note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else '')) + note='Downloading JSON metadata%s' % (f' page {page_num}' if page_num else '')) if not page_num: total_count = int_or_none(json_parsed['data'].get('totalCount')) - for entry in json_parsed["data"]["items"]: + for entry in json_parsed['data']['items']: count += 1 - yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id']) + yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id'])) page_num += 1 def _real_extract(self, url): @@ -917,7 +917,7 @@ class NiconicoLiveIE(InfoExtractor): 'url': 'https://live.nicovideo.jp/watch/lv339533123', 'info_dict': { 'id': 'lv339533123', - 'title': '激辛ペヤング食べます( ;ᯅ; )(歌枠オーディション参加中)', + 'title': '激辛ペヤング食べます\u202a( ;ᯅ; )\u202c(歌枠オーディション参加中)', 'view_count': 1526, 'comment_count': 1772, 'description': '初めましてもかって言います❕\nのんびり自由に適当に暮らしてます', @@ -973,14 +973,14 @@ class NiconicoLiveIE(InfoExtractor): 'quality': 'abr', 'protocol': 'hls+fmp4', 'latency': latency, - 'chasePlay': False + 'chasePlay': False, }, 'room': { 'protocol': 'webSocket', - 'commentable': True + 'commentable': True, }, 'reconnect': False, - } + }, })) while True: @@ -1004,7 +1004,7 @@ class NiconicoLiveIE(InfoExtractor): elif self.get_param('verbose', False): if len(recv) > 100: recv = recv[:100] + '...' - self.write_debug('Server said: %s' % recv) + self.write_debug(f'Server said: {recv}') title = traverse_obj(embedded_data, ('program', 'title')) or self._html_search_meta( ('og:title', 'twitter:title'), webpage, 'live title', fatal=False) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index 89af3f7..f39d000 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -18,9 +18,9 @@ from ..utils import ( class NiconicoChannelPlusBaseIE(InfoExtractor): _WEBPAGE_BASE_URL = 'https://nicochannel.jp' - def _call_api(self, path, item_id, *args, **kwargs): + def _call_api(self, path, item_id, **kwargs): return self._download_json( - f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, *args, **kwargs) + f'https://nfc-api.nicochannel.jp/fc/{path}', video_id=item_id, **kwargs) def _find_fanclub_site_id(self, channel_name): fanclub_list_json = self._call_api( diff --git a/yt_dlp/extractor/ninaprotocol.py b/yt_dlp/extractor/ninaprotocol.py index ea57c5f..c8063fb 100644 --- a/yt_dlp/extractor/ninaprotocol.py +++ b/yt_dlp/extractor/ninaprotocol.py @@ -41,7 +41,7 @@ class NinaProtocolIE(InfoExtractor): 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP', 'upload_date': '20231201', 'album_artist': 'Post Present Medium ', - } + }, }, { 'info_dict': { 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_2', @@ -60,7 +60,7 @@ class NinaProtocolIE(InfoExtractor): 'timestamp': 1701417610, 'album': 'The Spatulas - March Chant', 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A', - } + }, }, { 'info_dict': { 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_3', @@ -79,7 +79,7 @@ class NinaProtocolIE(InfoExtractor): 'tags': ['punk', 'postpresentmedium', 'cambridge'], 'uploader': 'ppmrecs', 'channel': 'ppm', - } + }, }, { 'info_dict': { 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_4', @@ -98,7 +98,7 @@ class NinaProtocolIE(InfoExtractor): 'channel': 'ppm', 'uploader_id': '2bGjgdKUddJoj2shYGqfNcUfoSoABP21RJoiwGMZDq3A', 'uploader': 'ppmrecs', - } + }, }, { 'info_dict': { 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_5', @@ -117,7 +117,7 @@ class NinaProtocolIE(InfoExtractor): 'uploader': 'ppmrecs', 'channel_id': '4ceG4zsb7VVxBTGPtZMqDZWGHo3VUg2xRvzC2b17ymWP', 'upload_date': '20231201', - } + }, }, { 'info_dict': { 'id': '3SvsMM3y4oTPZ5DXFJnLkCAqkxz34hjzFxqms1vu9XBJ_6', diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 579370f..93e2862 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -23,7 +23,7 @@ class NineCNineMediaIE(InfoExtractor): title = content['Name'] content_package = content['ContentPackages'][0] package_id = content_package['Id'] - content_package_url = api_base_url + 'contentpackages/%s/' % package_id + content_package_url = api_base_url + f'contentpackages/{package_id}/' content_package = self._download_json( content_package_url, content_id, query={ '$include': '[HasClosedCaptions]', @@ -91,7 +91,7 @@ class NineCNineMediaIE(InfoExtractor): }, { 'url': manifest_base_url + 'srt', 'ext': 'srt', - }] + }], } return info @@ -118,13 +118,13 @@ class CPTwentyFourIE(InfoExtractor): 'thumbnail': 'http://images2.9c9media.com/image_asset/2014_11_5_2eb609a0-475b-0132-fbd6-34b52f6f1279_jpg_2000x1125.jpg', 'upload_date': '20211122', }, - 'params': {'skip_download': True, 'format': 'bv'} + 'params': {'skip_download': True, 'format': 'bv'}, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - id, destination = self._search_regex( + video_id, destination = self._search_regex( r'getAuthStates\("(?P<id>[^"]+)",\s?"(?P<destination>[^"]+)"\);', webpage, 'video id and destination', group=('id', 'destination')) - return self.url_result(f'9c9media:{destination}:{id}', ie=NineCNineMediaIE.ie_key(), video_id=id) + return self.url_result(f'9c9media:{destination}:{video_id}', NineCNineMediaIE, video_id) diff --git a/yt_dlp/extractor/ninegag.py b/yt_dlp/extractor/ninegag.py index 865ad99..2979f3a 100644 --- a/yt_dlp/extractor/ninegag.py +++ b/yt_dlp/extractor/ninegag.py @@ -29,7 +29,7 @@ class NineGagIE(InfoExtractor): 'like_count': int, 'dislike_count': int, 'comment_count': int, - } + }, }, { # HTML escaped title 'url': 'https://9gag.com/gag/av5nvyb', @@ -53,14 +53,14 @@ class NineGagIE(InfoExtractor): 'uploader': 'Peter Klaus', 'uploader_id': 'peterklaus12', 'uploader_url': 'https://9gag.com/u/peterklaus12', - } + }, }] def _real_extract(self, url): post_id = self._match_id(url) post = self._download_json( 'https://9gag.com/v1/post', post_id, query={ - 'id': post_id + 'id': post_id, })['data']['post'] if post.get('type') != 'Animated': diff --git a/yt_dlp/extractor/ninenews.py b/yt_dlp/extractor/ninenews.py index 0b4f47b..08d7fe4 100644 --- a/yt_dlp/extractor/ninenews.py +++ b/yt_dlp/extractor/ninenews.py @@ -21,7 +21,7 @@ class NineNewsIE(InfoExtractor): 'upload_date': '20231222', 'uploader_id': '664969388001', 'tags': ['networkclip', 'aunews_aunationalninenews', 'christmas presents', 'toys', 'fair trading', 'au_news'], - } + }, }, { 'url': 'https://www.9news.com.au/world/tape-reveals-donald-trump-pressured-michigan-officials-not-to-certify-2020-vote-a-new-report-says/0b8b880e-7d3c-41b9-b2bd-55bc7e492259', 'md5': 'a885c44d20898c3e70e9a53e8188cea1', @@ -36,7 +36,7 @@ class NineNewsIE(InfoExtractor): 'upload_date': '20231220', 'uploader_id': '664969388001', 'tags': ['networkclip', 'aunews_aunationalninenews', 'ineligible', 'presidential candidate', 'donald trump', 'au_news'], - } + }, }, { 'url': 'https://www.9news.com.au/national/outrage-as-parents-banned-from-giving-gifts-to-kindergarten-teachers/e19b49d4-a1a4-4533-9089-6e10e2d9386a', 'info_dict': { diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index b7170b0..f17531e 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, float_or_none, @@ -55,7 +54,7 @@ class NineNowIE(InfoExtractor): 'expected_warnings': ['Ignoring subtitle tracks'], 'params': { 'skip_download': True, - } + }, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s' @@ -72,15 +71,15 @@ class NineNowIE(InfoExtractor): for kind in ('episode', 'clip'): current_key = page_data.get(kind, {}).get( - 'current%sKey' % kind.capitalize()) + f'current{kind.capitalize()}Key') if not current_key: continue - cache = page_data.get(kind, {}).get('%sCache' % kind, {}) + cache = page_data.get(kind, {}).get(f'{kind}Cache', {}) if not cache: continue common_data = { - 'episode': (cache.get(current_key) or list(cache.values())[0])[kind], - 'season': (cache.get(current_key) or list(cache.values())[0]).get('season', None) + 'episode': (cache.get(current_key) or next(iter(cache.values())))[kind], + 'season': (cache.get(current_key) or next(iter(cache.values()))).get('season', None), } break else: @@ -89,14 +88,14 @@ class NineNowIE(InfoExtractor): if not self.get_param('allow_unplayable_formats') and try_get(common_data, lambda x: x['episode']['video']['drm'], bool): self.report_drm(display_id) brightcove_id = try_get( - common_data, lambda x: x['episode']['video']['brightcoveId'], compat_str) or 'ref:%s' % common_data['episode']['video']['referenceId'] + common_data, lambda x: x['episode']['video']['brightcoveId'], str) or 'ref:{}'.format(common_data['episode']['video']['referenceId']) video_id = str_or_none(try_get(common_data, lambda x: x['episode']['video']['id'])) or brightcove_id - title = try_get(common_data, lambda x: x['episode']['name'], compat_str) + title = try_get(common_data, lambda x: x['episode']['name'], str) season_number = try_get(common_data, lambda x: x['season']['seasonNumber'], int) episode_number = try_get(common_data, lambda x: x['episode']['episodeNumber'], int) - timestamp = unified_timestamp(try_get(common_data, lambda x: x['episode']['airDate'], compat_str)) - release_date = unified_strdate(try_get(common_data, lambda x: x['episode']['availability'], compat_str)) + timestamp = unified_timestamp(try_get(common_data, lambda x: x['episode']['airDate'], str)) + release_date = unified_strdate(try_get(common_data, lambda x: x['episode']['availability'], str)) thumbnails_data = try_get(common_data, lambda x: x['episode']['image']['sizes'], dict) or {} thumbnails = [{ 'id': thumbnail_id, @@ -111,7 +110,7 @@ class NineNowIE(InfoExtractor): {'geo_countries': self._GEO_COUNTRIES}), 'id': video_id, 'title': title, - 'description': try_get(common_data, lambda x: x['episode']['description'], compat_str), + 'description': try_get(common_data, lambda x: x['episode']['description'], str), 'duration': float_or_none(try_get(common_data, lambda x: x['episode']['video']['duration'], float), 1000), 'thumbnails': thumbnails, 'ie_key': 'BrightcoveNew', diff --git a/yt_dlp/extractor/nintendo.py b/yt_dlp/extractor/nintendo.py index 853a169..d8eb853 100644 --- a/yt_dlp/extractor/nintendo.py +++ b/yt_dlp/extractor/nintendo.py @@ -91,7 +91,7 @@ class NintendoIE(InfoExtractor): 'extensions': json.dumps({ 'persistedQuery': { 'version': 1, - 'sha256Hash': '969b16fe9f08b686fa37bc44d1fd913b6188e65794bb5e341c54fa683a8004cb' + 'sha256Hash': '969b16fe9f08b686fa37bc44d1fd913b6188e65794bb5e341c54fa683a8004cb', }, }, separators=(',', ':')), }) diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py index 249e7cd..7609b40 100644 --- a/yt_dlp/extractor/nitter.py +++ b/yt_dlp/extractor/nitter.py @@ -1,8 +1,8 @@ import random import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( determine_ext, parse_count, @@ -265,7 +265,7 @@ class NitterIE(InfoExtractor): 'like_count': int, 'repost_count': int, 'comment_count': int, - } + }, }, { # no OpenGraph title 'url': f'https://{current_instance}/LocalBateman/status/1678455464038735895#m', 'info_dict': { @@ -286,12 +286,12 @@ class NitterIE(InfoExtractor): }, 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'], 'params': {'skip_download': 'm3u8'}, - } + }, ] def _real_extract(self, url): video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') - parsed_url = compat_urlparse.urlparse(url) + parsed_url = urllib.parse.urlparse(url) base_url = f'{parsed_url.scheme}://{parsed_url.netloc}' self._set_cookie(parsed_url.netloc, 'hlsPlayback', 'on') @@ -301,7 +301,7 @@ class NitterIE(InfoExtractor): if main_tweet_start > 0: webpage = full_webpage[main_tweet_start:] - video_url = '%s%s' % (base_url, self._html_search_regex( + video_url = '{}{}'.format(base_url, self._html_search_regex( r'(?:<video[^>]+data-url|<source[^>]+src)="([^"]+)"', webpage, 'video url')) ext = determine_ext(video_url) @@ -310,7 +310,7 @@ class NitterIE(InfoExtractor): else: formats = [{ 'url': video_url, - 'ext': ext + 'ext': ext, }] title = description = self._og_search_description(full_webpage, default=None) or self._html_search_regex( @@ -334,12 +334,12 @@ class NitterIE(InfoExtractor): thumbnail = ( self._html_search_meta('og:image', full_webpage, 'thumbnail url') - or remove_end('%s%s' % (base_url, self._html_search_regex( + or remove_end('{}{}'.format(base_url, self._html_search_regex( r'<video[^>]+poster="([^"]+)"', webpage, 'thumbnail url', fatal=False)), '%3Asmall')) thumbnails = [ - {'id': id, 'url': f'{thumbnail}%3A{id}'} - for id in ('thumb', 'small', 'large', 'medium', 'orig') + {'id': id_, 'url': f'{thumbnail}%3A{id_}'} + for id_ in ('thumb', 'small', 'large', 'medium', 'orig') ] date = self._html_search_regex( diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py index 513529b..536ca27 100644 --- a/yt_dlp/extractor/nobelprize.py +++ b/yt_dlp/extractor/nobelprize.py @@ -20,7 +20,7 @@ class NobelPrizeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Announcement of the 2016 Nobel Prize in Physics', 'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/noice.py b/yt_dlp/extractor/noice.py index e6e3433..f413055 100644 --- a/yt_dlp/extractor/noice.py +++ b/yt_dlp/extractor/noice.py @@ -35,7 +35,7 @@ class NoicePodcastIE(InfoExtractor): 'comment_count': int, 'dislike_count': int, 'channel_follower_count': int, - } + }, }, { 'url': 'https://open.noice.id/content/222134e4-99f2-456f-b8a2-b8be404bf063', 'info_dict': { @@ -60,7 +60,7 @@ class NoicePodcastIE(InfoExtractor): 'comment_count': int, 'channel': 'Dear Jerome', 'channel_follower_count': int, - } + }, }] def _get_formats_and_subtitles(self, media_url, video_id): @@ -112,5 +112,5 @@ class NoicePodcastIE(InfoExtractor): 'dislike_count': 'dislikes', 'comment_count': 'comments', 'channel_follower_count': 'followers', - })) + })), } diff --git a/yt_dlp/extractor/nonktube.py b/yt_dlp/extractor/nonktube.py index f191be3..192e6bb 100644 --- a/yt_dlp/extractor/nonktube.py +++ b/yt_dlp/extractor/nonktube.py @@ -14,7 +14,7 @@ class NonkTubeIE(NuevoBaseIE): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'https://www.nonktube.com/embed/118636', 'only_matching': True, diff --git a/yt_dlp/extractor/noodlemagazine.py b/yt_dlp/extractor/noodlemagazine.py index 1c1a763..6414f46 100644 --- a/yt_dlp/extractor/noodlemagazine.py +++ b/yt_dlp/extractor/noodlemagazine.py @@ -25,8 +25,8 @@ class NoodleMagazineIE(InfoExtractor): 'description': 'Aria alexander manojob', 'tags': ['aria', 'alexander', 'manojob'], 'upload_date': '20190218', - 'age_limit': 18 - } + 'age_limit': 18, + }, } def _real_extract(self, url): @@ -76,5 +76,5 @@ class NoodleMagazineIE(InfoExtractor): 'view_count': view_count, 'like_count': like_count, 'upload_date': upload_date, - 'age_limit': 18 + 'age_limit': 18, } diff --git a/yt_dlp/extractor/noovo.py b/yt_dlp/extractor/noovo.py index acbb74c..772d4ed 100644 --- a/yt_dlp/extractor/noovo.py +++ b/yt_dlp/extractor/noovo.py @@ -1,6 +1,5 @@ from .brightcove import BrightcoveNewIE from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, js_to_json, @@ -65,7 +64,7 @@ class NoovoIE(InfoExtractor): title = try_get( data, lambda x: x['video']['nom'], - compat_str) or self._html_search_meta( + str) or self._html_search_meta( 'dcterms.Title', webpage, 'title', fatal=True) description = self._html_search_meta( @@ -77,11 +76,11 @@ class NoovoIE(InfoExtractor): webpage, 'series', default=None) season_el = try_get(data, lambda x: x['emission']['saison'], dict) or {} - season = try_get(season_el, lambda x: x['nom'], compat_str) + season = try_get(season_el, lambda x: x['nom'], str) season_number = int_or_none(try_get(season_el, lambda x: x['numero'])) episode_el = try_get(season_el, lambda x: x['episode'], dict) or {} - episode = try_get(episode_el, lambda x: x['nom'], compat_str) + episode = try_get(episode_el, lambda x: x['nom'], str) episode_number = int_or_none(try_get(episode_el, lambda x: x['numero'])) return { diff --git a/yt_dlp/extractor/nosnl.py b/yt_dlp/extractor/nosnl.py index cea54c9..13f908c 100644 --- a/yt_dlp/extractor/nosnl.py +++ b/yt_dlp/extractor/nosnl.py @@ -15,7 +15,7 @@ class NOSNLArticleIE(InfoExtractor): 'title': '\'We hebben een huis vol met scheuren\'', 'duration': 95.0, 'thumbnail': 'https://cdn.nos.nl/image/2022/08/12/887149/3840x2160a.jpg', - } + }, }, { # more than 1 video 'url': 'https://nos.nl/artikel/2440409-vannacht-sliepen-weer-enkele-honderden-asielzoekers-in-ter-apel-buiten', @@ -64,7 +64,7 @@ class NOSNLArticleIE(InfoExtractor): 'categories': ['Buitenland'], }, 'playlist_mincount': 1, - } + }, ] def _entries(self, nextjs_json, display_id): @@ -82,7 +82,7 @@ class NOSNLArticleIE(InfoExtractor): 'thumbnails': [{ 'url': traverse_obj(image, ('url', ...), get_all=False), 'width': image.get('width'), - 'height': image.get('height') + 'height': image.get('height'), } for image in traverse_obj(item, ('imagesByRatio', ...))[0]], } diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index 72884aa..e7b69e3 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -58,7 +58,7 @@ class NovaEmbedIE(InfoExtractor): duration = None formats = [] - def process_format_list(format_list, format_id=""): + def process_format_list(format_list, format_id=''): nonlocal formats, has_drm if not isinstance(format_list, list): format_list = [format_list] @@ -144,7 +144,7 @@ class NovaIE(InfoExtractor): 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', 'thumbnail': r're:^https?://.*\.(?:jpg)', 'duration': 151, - } + }, }, { 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', 'info_dict': { @@ -216,11 +216,11 @@ class NovaIE(InfoExtractor): if embed_id: return { '_type': 'url_transparent', - 'url': 'https://media.cms.nova.cz/embed/%s' % embed_id, + 'url': f'https://media.cms.nova.cz/embed/{embed_id}', 'ie_key': NovaEmbedIE.ie_key(), 'id': embed_id, 'description': description, - 'upload_date': upload_date + 'upload_date': upload_date, } video_id = self._search_regex( diff --git a/yt_dlp/extractor/novaplay.py b/yt_dlp/extractor/novaplay.py index adab33f..77d7ce1 100644 --- a/yt_dlp/extractor/novaplay.py +++ b/yt_dlp/extractor/novaplay.py @@ -34,7 +34,7 @@ class NovaPlayIE(InfoExtractor): 'thumbnail': 'https://nbg-img.fite.tv/img/606609_460x260.jpg', 'description': '29 сек', }, - } + }, ] _access_token = None @@ -50,7 +50,7 @@ class NovaPlayIE(InfoExtractor): video_id, headers={ 'x-flipps-user-agent': 'Flipps/75/9.7', 'x-flipps-version': '2022-05-17', - 'Authorization': f'Bearer {self._access_token}' + 'Authorization': f'Bearer {self._access_token}', })[0]['links']['play']['href'] formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') diff --git a/yt_dlp/extractor/nowness.py b/yt_dlp/extractor/nowness.py index a3c29f6..c001a82 100644 --- a/yt_dlp/extractor/nowness.py +++ b/yt_dlp/extractor/nowness.py @@ -3,7 +3,6 @@ from .brightcove import ( BrightcoveNewIE, ) from .common import InfoExtractor -from ..compat import compat_str from ..networking import Request from ..utils import ExtractorError @@ -17,7 +16,7 @@ class NownessBaseIE(InfoExtractor): source = media['source'] if source == 'brightcove': player_code = self._download_webpage( - 'http://www.nowness.com/iframe?id=%s' % video_id, video_id, + f'http://www.nowness.com/iframe?id={video_id}', video_id, note='Downloading player JavaScript', errnote='Unable to download player JavaScript') bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code) @@ -28,7 +27,7 @@ class NownessBaseIE(InfoExtractor): return self.url_result(bc_url, BrightcoveNewIE.ie_key()) raise ExtractorError('Could not find player definition') elif source == 'vimeo': - return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') + return self.url_result(f'http://vimeo.com/{video_id}', 'Vimeo') elif source == 'youtube': return self.url_result(video_id, 'Youtube') elif source == 'cinematique': @@ -139,4 +138,4 @@ class NownessSeriesIE(NownessBaseIE): series_title = translations[0].get('title') or translations[0]['seoTitle'] series_description = translations[0].get('seoDescription') return self.playlist_result( - entries, compat_str(series['id']), series_title, series_description) + entries, str(series['id']), series_title, series_description) diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index 19cb972..8476a85 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote from ..utils import ( find_xpath_attr, int_or_none, @@ -35,9 +36,9 @@ class NozIE(InfoExtractor): config_url_encoded = self._search_regex( r'so\.addVariable\("config_url","[^,]*,(.*?)"', - edge_content, 'config URL' + edge_content, 'config URL', ) - config_url = compat_urllib_parse_unquote(config_url_encoded) + config_url = urllib.parse.unquote(config_url_encoded) doc = self._download_xml(config_url, 'video configuration') title = xpath_text(doc, './/title') @@ -53,7 +54,7 @@ class NozIE(InfoExtractor): formats.append({ 'url': http_url, 'format_name': xpath_text(qnode, './name'), - 'format_id': '%s-%s' % ('http', xpath_text(qnode, './id')), + 'format_id': '{}-{}'.format('http', xpath_text(qnode, './id')), 'height': int_or_none(xpath_text(qnode, './height')), 'width': int_or_none(xpath_text(qnode, './width')), 'tbr': int_or_none(xpath_text(qnode, './bitrate'), scale=1000), diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py index 4d5ff50..178fd98 100644 --- a/yt_dlp/extractor/npo.py +++ b/yt_dlp/extractor/npo.py @@ -200,7 +200,7 @@ class NPOIE(InfoExtractor): def suitable(cls, url): return (False if any(ie.suitable(url) for ie in (NPOLiveIE, NPORadioIE, NPORadioFragmentIE)) - else super(NPOIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): video_id = self._match_id(url) @@ -228,7 +228,7 @@ class NPOIE(InfoExtractor): 'hasAdConsent': 0, }), headers={ 'x-xsrf-token': try_call(lambda: urllib.parse.unquote( - self._get_cookies('https://www.npostart.nl')['XSRF-TOKEN'].value)) + self._get_cookies('https://www.npostart.nl')['XSRF-TOKEN'].value)), }) player_token = player['token'] @@ -238,8 +238,8 @@ class NPOIE(InfoExtractor): formats = [] for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'): streams = self._download_json( - 'https://start-player.npo.nl/video/%s/streams' % video_id, - video_id, 'Downloading %s profile JSON' % profile, fatal=False, + f'https://start-player.npo.nl/video/{video_id}/streams', + video_id, f'Downloading {profile} profile JSON', fatal=False, query={ 'profile': profile, 'quality': 'npoplus', @@ -339,7 +339,7 @@ class NPOLiveIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'http://www.npo.nl/live', 'only_matching': True, @@ -358,7 +358,7 @@ class NPOLiveIE(InfoExtractor): return { '_type': 'url_transparent', - 'url': 'npo:%s' % live_id, + 'url': f'npo:{live_id}', 'ie_key': NPOIE.ie_key(), 'id': live_id, 'display_id': display_id, @@ -379,16 +379,16 @@ class NPORadioIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, } @classmethod def suitable(cls, url): - return False if NPORadioFragmentIE.suitable(url) else super(NPORadioIE, cls).suitable(url) + return False if NPORadioFragmentIE.suitable(url) else super().suitable(url) @staticmethod def _html_get_attribute_regex(attribute): - return r'{0}\s*=\s*\'([^\']+)\''.format(attribute) + return rf'{attribute}\s*=\s*\'([^\']+)\'' def _real_extract(self, url): video_id = self._match_id(url) @@ -434,7 +434,7 @@ class NPORadioFragmentIE(InfoExtractor): webpage = self._download_webpage(url, audio_id) title = self._html_search_regex( - r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id, + rf'href="/radio/[^/]+/fragment/{audio_id}" title="([^"]+)"', webpage, 'title') audio_url = self._search_regex( @@ -456,8 +456,8 @@ class NPODataMidEmbedIE(InfoExtractor): # XXX: Conventionally, base classes sho return { '_type': 'url_transparent', 'ie_key': 'NPO', - 'url': 'npo:%s' % video_id, - 'display_id': display_id + 'url': f'npo:{video_id}', + 'display_id': display_id, } @@ -472,12 +472,12 @@ class SchoolTVIE(NPODataMidEmbedIE): 'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam', 'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?', 'ext': 'mp4', - 'description': 'md5:abfa0ff690adb73fd0297fd033aaa631' + 'description': 'md5:abfa0ff690adb73fd0297fd033aaa631', }, 'params': { # Skip because of m3u8 download - 'skip_download': True - } + 'skip_download': True, + }, } @@ -496,8 +496,8 @@ class HetKlokhuisIE(NPODataMidEmbedIE): 'upload_date': '20170223', }, 'params': { - 'skip_download': True - } + 'skip_download': True, + }, } @@ -508,7 +508,7 @@ class NPOPlaylistBaseIE(NPOIE): # XXX: Do not subclass from concrete IE webpage = self._download_webpage(url, playlist_id) entries = [ - self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) + self.url_result(f'npo:{video_id}' if not video_id.startswith('http') else video_id) for video_id in orderedSet(re.findall(self._PLAYLIST_ENTRY_RE, webpage)) ] @@ -574,9 +574,9 @@ class VPROIE(NPOPlaylistBaseIE): }, 'params': { # Skip because of m3u8 download - 'skip_download': True + 'skip_download': True, }, - } + }, ] diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py index 4b6855c..06103ff 100644 --- a/yt_dlp/extractor/npr.py +++ b/yt_dlp/extractor/npr.py @@ -8,14 +8,14 @@ class NprIE(InfoExtractor): 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more', 'info_dict': { 'id': '449974205', - 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More' + 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More', }, 'playlist_count': 7, }, { 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz', 'info_dict': { 'id': '446928052', - 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'" + 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'", }, 'playlist': [{ 'md5': '12fa60cb2d3ed932f53609d4aeceabf1', diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 384865a..658ae5f 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -3,7 +3,6 @@ import random import re from .common import InfoExtractor -from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -53,12 +52,12 @@ class NRKBaseIE(InfoExtractor): msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=self._GEO_COUNTRIES) message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type) - raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {message}', expected=True) def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None): return self._download_json( urljoin('https://psapi.nrk.no/', path), - video_id, note or 'Downloading %s JSON' % item, + video_id, note or f'Downloading {item} JSON', fatal=fatal, query=query) @@ -85,7 +84,7 @@ class NRKIE(NRKBaseIE): 'title': 'Dompap og andre fugler i Piip-Show', 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', 'duration': 262, - } + }, }, { # audio 'url': 'http://www.nrk.no/video/PS*154915', @@ -96,7 +95,7 @@ class NRKIE(NRKBaseIE): 'title': 'Slik høres internett ut når du er blind', 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', 'duration': 20, - } + }, }, { 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9', 'only_matching': True, @@ -156,7 +155,7 @@ class NRKIE(NRKBaseIE): # known values for preferredCdn: akamai, iponly, minicdn and telenor manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'}) - video_id = try_get(manifest, lambda x: x['id'], compat_str) or video_id + video_id = try_get(manifest, lambda x: x['id'], str) or video_id if manifest.get('playability') == 'nonPlayable': self._raise_error(manifest['nonPlayable']) @@ -216,13 +215,13 @@ class NRKIE(NRKBaseIE): sub_key = str_or_none(sub.get('language')) or 'nb' sub_type = str_or_none(sub.get('type')) if sub_type: - sub_key += '-%s' % sub_type + sub_key += f'-{sub_type}' subtitles.setdefault(sub_key, []).append({ 'url': sub_url, }) legal_age = try_get( - data, lambda x: x['legalAge']['body']['rating']['code'], compat_str) + data, lambda x: x['legalAge']['body']['rating']['code'], str) # https://en.wikipedia.org/wiki/Norwegian_Media_Authority age_limit = None if legal_age: @@ -243,13 +242,13 @@ class NRKIE(NRKBaseIE): 'age_limit': age_limit, 'formats': formats, 'subtitles': subtitles, - 'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str)) + 'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str)), } if is_series: series = season_id = season_number = episode = episode_number = None programs = self._call_api( - 'programs/%s' % video_id, video_id, 'programs', fatal=False) + f'programs/{video_id}', video_id, 'programs', fatal=False) if programs and isinstance(programs, dict): series = str_or_none(programs.get('seriesTitle')) season_id = str_or_none(programs.get('seasonId')) @@ -259,7 +258,7 @@ class NRKIE(NRKBaseIE): if not series: series = title if alt_title: - title += ' - %s' % alt_title + title += f' - {alt_title}' if not season_number: season_number = int_or_none(self._search_regex( r'Sesong\s+(\d+)', description or '', 'season number', @@ -289,7 +288,7 @@ class NRKIE(NRKBaseIE): class NRKTVIE(InfoExtractor): IE_DESC = 'NRK TV and NRK Radio' _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})' - _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE + _VALID_URL = rf'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*{_EPISODE_RE}' _TESTS = [{ 'url': 'https://tv.nrk.no/program/MDDP12000117', 'md5': 'c4a5960f1b00b40d47db65c1064e0ab1', @@ -306,7 +305,7 @@ class NRKTVIE(InfoExtractor): }], 'nb-ttv': [{ 'ext': 'vtt', - }] + }], }, }, }, { @@ -411,7 +410,7 @@ class NRKTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) return self.url_result( - 'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id) + f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id) class NRKTVEpisodeIE(InfoExtractor): @@ -461,14 +460,14 @@ class NRKTVEpisodeIE(InfoExtractor): info = self._search_json_ld(webpage, display_id, default={}) nrk_id = info.get('@id') or self._html_search_meta( 'nrk:program-id', webpage, default=None) or self._search_regex( - r'data-program-id=["\'](%s)' % NRKTVIE._EPISODE_RE, webpage, + rf'data-program-id=["\']({NRKTVIE._EPISODE_RE})', webpage, 'nrk id') assert re.match(NRKTVIE._EPISODE_RE, nrk_id) info.update({ '_type': 'url', 'id': nrk_id, - 'url': 'nrk:%s' % nrk_id, + 'url': f'nrk:{nrk_id}', 'ie_key': NRKIE.ie_key(), 'season_number': int(season_number), 'episode_number': int(episode_number), @@ -483,13 +482,13 @@ class NRKTVSerieBaseIE(NRKBaseIE): entries = [] for episode in entry_list: nrk_id = episode.get('prfId') or episode.get('episodeId') - if not nrk_id or not isinstance(nrk_id, compat_str): + if not nrk_id or not isinstance(nrk_id, str): continue entries.append(self.url_result( - 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)) + f'nrk:{nrk_id}', ie=NRKIE.ie_key(), video_id=nrk_id)) return entries - _ASSETS_KEYS = ('episodes', 'instalments',) + _ASSETS_KEYS = ('episodes', 'instalments') def _extract_assets_key(self, embedded): for asset_key in self._ASSETS_KEYS: @@ -514,19 +513,18 @@ class NRKTVSerieBaseIE(NRKBaseIE): (lambda x: x[assets_key]['_embedded'][assets_key], lambda x: x[assets_key]), list) - for e in self._extract_entries(entries): - yield e + yield from self._extract_entries(entries) # Find next URL next_url_path = try_get( data, (lambda x: x['_links']['next']['href'], lambda x: x['_embedded'][assets_key]['_links']['next']['href']), - compat_str) + str) if not next_url_path: break data = self._call_api( next_url_path, display_id, - note='Downloading %s JSON page %d' % (assets_key, page_num), + note=f'Downloading {assets_key} JSON page {page_num}', fatal=False) if not data: break @@ -593,7 +591,7 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE): @classmethod def suitable(cls, url): return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url) or NRKRadioPodkastIE.suitable(url) - else super(NRKTVSeasonIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): mobj = self._match_valid_url(url) @@ -601,14 +599,13 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE): serie_kind = mobj.group('serie_kind') serie = mobj.group('serie') season_id = mobj.group('id') or mobj.group('id_2') - display_id = '%s/%s' % (serie, season_id) + display_id = f'{serie}/{season_id}' data = self._call_api( - '%s/catalog/%s/%s/seasons/%s' - % (domain, self._catalog_name(serie_kind), serie, season_id), + f'{domain}/catalog/{self._catalog_name(serie_kind)}/{serie}/seasons/{season_id}', display_id, 'season', query={'pageSize': 50}) - title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id + title = try_get(data, lambda x: x['titles']['title'], str) or display_id return self.playlist_result( self._entries(data, display_id), display_id, title) @@ -691,7 +688,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): return ( False if any(ie.suitable(url) for ie in (NRKTVIE, NRKTVEpisodeIE, NRKRadioPodkastIE, NRKTVSeasonIE)) - else super(NRKTVSeriesIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): site, serie_kind, series_id = self._match_valid_url(url).groups() @@ -700,8 +697,7 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): size_prefix = 'p' if is_radio else 'embeddedInstalmentsP' series = self._call_api( - '%s/catalog/%s/%s' - % (domain, self._catalog_name(serie_kind), series_id), + f'{domain}/catalog/{self._catalog_name(serie_kind)}/{series_id}', series_id, 'serie', query={size_prefix + 'ageSize': 50}) titles = try_get(series, [ lambda x: x['titles'], @@ -719,8 +715,8 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE): season_url = urljoin(url, season.get('href')) if not season_url: season_name = season.get('name') - if season_name and isinstance(season_name, compat_str): - season_url = 'https://%s.nrk.no/serie/%s/sesong/%s' % (domain, series_id, season_name) + if season_name and isinstance(season_name, str): + season_url = f'https://{domain}.nrk.no/serie/{series_id}/sesong/{season_name}' if season_url: entries.append(self.url_result( season_url, ie=NRKTVSeasonIE.ie_key(), @@ -777,7 +773,7 @@ class NRKRadioPodkastIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) return self.url_result( - 'nrk:%s' % video_id, ie=NRKIE.ie_key(), video_id=video_id) + f'nrk:{video_id}', ie=NRKIE.ie_key(), video_id=video_id) class NRKPlaylistBaseIE(InfoExtractor): @@ -790,7 +786,7 @@ class NRKPlaylistBaseIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) entries = [ - self.url_result('nrk:%s' % video_id, NRKIE.ie_key()) + self.url_result(f'nrk:{video_id}', NRKIE.ie_key()) for video_id in re.findall(self._ITEM_RE, webpage) ] @@ -831,7 +827,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE): class NRKTVEpisodesIE(NRKPlaylistBaseIE): _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)' - _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE + _ITEM_RE = rf'data-episode=["\']{NRKTVIE._EPISODE_RE}' _TESTS = [{ 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031', 'info_dict': { @@ -869,7 +865,7 @@ class NRKSkoleIE(InfoExtractor): video_id = self._match_id(url) nrk_id = self._download_json( - 'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/%s' % video_id, + f'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/{video_id}', video_id)['psId'] - return self.url_result('nrk:%s' % nrk_id) + return self.url_result(f'nrk:{nrk_id}') diff --git a/yt_dlp/extractor/ntvru.py b/yt_dlp/extractor/ntvru.py index fe39657..1ab1be0 100644 --- a/yt_dlp/extractor/ntvru.py +++ b/yt_dlp/extractor/ntvru.py @@ -102,7 +102,7 @@ class NTVRuIE(InfoExtractor): self._VIDEO_ID_REGEXES, webpage, 'video id') player = self._download_xml( - 'http://www.ntv.ru/vi%s/' % video_id, + f'http://www.ntv.ru/vi{video_id}/', video_id, 'Downloading video XML') title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True))) @@ -111,7 +111,7 @@ class NTVRuIE(InfoExtractor): formats = [] for format_id in ['', 'hi', 'webm']: - file_ = xpath_text(video, './%sfile' % format_id) + file_ = xpath_text(video, f'./{format_id}file') if not file_: continue if file_.startswith('//'): @@ -120,7 +120,7 @@ class NTVRuIE(InfoExtractor): file_ = 'http://media.ntv.ru/vod/' + file_ formats.append({ 'url': file_, - 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)), + 'filesize': int_or_none(xpath_text(video, f'./{format_id}size')), }) hls_manifest = xpath_text(video, './playback/hls') if hls_manifest: diff --git a/yt_dlp/extractor/nubilesporn.py b/yt_dlp/extractor/nubilesporn.py index 1d630f5..c2079d8 100644 --- a/yt_dlp/extractor/nubilesporn.py +++ b/yt_dlp/extractor/nubilesporn.py @@ -51,8 +51,8 @@ class NubilesPornIE(InfoExtractor): 'season': 'Season 3', 'season_number': 3, 'episode': 'Episode 1', - 'episode_number': 1 - } + 'episode_number': 1, + }, }] def _perform_login(self, username, password): @@ -95,5 +95,5 @@ class NubilesPornIE(InfoExtractor): 'series': channel_name, 'series_id': channel_id, 'season_number': int_or_none(url_match.group('season')), - 'episode_number': int_or_none(url_match.group('episode')) + 'episode_number': int_or_none(url_match.group('episode')), } diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py index 5670445..945fd0c 100644 --- a/yt_dlp/extractor/nuevo.py +++ b/yt_dlp/extractor/nuevo.py @@ -28,5 +28,5 @@ class NuevoBaseIE(InfoExtractor): 'title': title, 'thumbnail': thumbnail, 'duration': duration, - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/nuum.py b/yt_dlp/extractor/nuum.py index 3db663d..697fc6b 100644 --- a/yt_dlp/extractor/nuum.py +++ b/yt_dlp/extractor/nuum.py @@ -43,15 +43,17 @@ class NuumBaseIE(InfoExtractor): is_live = media.get('media_status') == 'RUNNING' formats, subtitles = None, None + headers = {'Referer': 'https://nuum.ru/'} if extract_formats: formats, subtitles = self._extract_m3u8_formats_and_subtitles( - media_url, video_id, 'mp4', live=is_live) + media_url, video_id, 'mp4', live=is_live, headers=headers) return filter_dict({ 'id': video_id, 'is_live': is_live, 'formats': formats, 'subtitles': subtitles, + 'http_headers': headers, **traverse_obj(container, { 'title': ('media_container_name', {str}), 'description': ('media_container_description', {str}), @@ -78,7 +80,7 @@ class NuumMediaIE(NuumBaseIE): 'only_matching': True, }, { 'url': 'https://nuum.ru/videos/1567547-toxi-hurtz', - 'md5': 'f1d9118a30403e32b702a204eb03aca3', + 'md5': 'ce28837a5bbffe6952d7bfd3d39811b0', 'info_dict': { 'id': '1567547', 'ext': 'mp4', diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index 0ef0ec7..99a426b 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -22,7 +22,7 @@ class NuvidIE(InfoExtractor): 'duration': 321.0, 'age_limit': 18, 'thumbnail': r're:https?://.+\.jpg', - } + }, }, { 'url': 'https://m.nuvid.com/video/6523263', 'md5': 'ebd22ce8e47e1d9a4d0756a15c67da52', @@ -34,7 +34,7 @@ class NuvidIE(InfoExtractor): 'age_limit': 18, 'thumbnail': r're:https?://.+\.jpg', 'thumbnails': list, - } + }, }, { 'url': 'http://m.nuvid.com/video/6415801/', 'md5': '638d5ececb138d5753593f751ae3f697', @@ -45,7 +45,7 @@ class NuvidIE(InfoExtractor): 'duration': 1882, 'age_limit': 18, 'thumbnail': r're:https?://.+\.jpg', - } + }, }] def _real_extract(self, url): @@ -64,7 +64,7 @@ class NuvidIE(InfoExtractor): }) webpage = self._download_webpage( - 'http://m.nuvid.com/video/%s' % (video_id, ), + f'http://m.nuvid.com/video/{video_id}', video_id, 'Downloading video page', fatal=False) or '' title = strip_or_none(video_data.get('title') or self._html_search_regex( diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py index 3019202..5ec3cdd 100644 --- a/yt_dlp/extractor/nytimes.py +++ b/yt_dlp/extractor/nytimes.py @@ -251,7 +251,7 @@ class NYTimesArticleIE(NYTimesBaseIE): 'thumbnails': self._extract_thumbnails(traverse_obj( block, ('promotionalMedia', 'crops', ..., 'renditions', ...))), 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py index 0a12aea..7c09e9e 100644 --- a/yt_dlp/extractor/nzherald.py +++ b/yt_dlp/extractor/nzherald.py @@ -2,7 +2,6 @@ import json from .brightcove import BrightcoveNewIE from .common import InfoExtractor -from ..compat import compat_str from ..utils import ExtractorError, traverse_obj @@ -24,7 +23,7 @@ class NZHeraldIE(InfoExtractor): 'tags': [], 'thumbnail': r're:https?://.*\.jpg$', 'description': 'md5:2f17713fcbfcfbe38bb9e7dfccbb0f2e', - } + }, }, { # Webpage has brightcove embed player url 'url': 'https://www.nzherald.co.nz/travel/pencarrow-coastal-trail/HDVTPJEPP46HJ2UEMK4EGD2DFI/', @@ -39,7 +38,7 @@ class NZHeraldIE(InfoExtractor): 'thumbnail': r're:https?://.*\.jpg$', 'tags': ['travel', 'video'], 'duration': 43.627, - } + }, }, { # two video embeds of the same video 'url': 'https://www.nzherald.co.nz/nz/truck-driver-captured-cutting-off-motorist-on-state-highway-1-in-canterbury/FIHNJB7PLLPHWQPK4S7ZBDUC4I/', @@ -50,7 +49,7 @@ class NZHeraldIE(InfoExtractor): 'timestamp': 1619730509, 'upload_date': '20210429', 'uploader_id': '1308227299001', - 'description': 'md5:4cae7dfb7613ac4c73b9e73a75c6b5d7' + 'description': 'md5:4cae7dfb7613ac4c73b9e73a75c6b5d7', }, 'skip': 'video removed', }, { @@ -67,17 +66,17 @@ class NZHeraldIE(InfoExtractor): 'tags': ['video', 'nz herald focus', 'politics', 'politics videos'], 'thumbnail': r're:https?://.*\.jpg$', 'duration': 99.584, - } + }, }, { 'url': 'https://www.nzherald.co.nz/kahu/kaupapa-companies-my-taiao-supporting-maori-in-study-and-business/PQBO2J25WCG77VGRX7W7BVYEAI/', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://nzherald.co.nz/the-country/video/focus-nzs-first-mass-covid-19-vaccination-event/N5I7IL3BRFLZSD33TLDLYJDGK4/', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.nzherald.co.nz/the-vision-is-clear/news/tvic-damian-roper-planting-trees-an-addiction/AN2AAEPNRK5VLISDWQAJZB6ATQ', - 'only_matching': True - } + 'only_matching': True, + }, ] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1308227299001/S1BXZn8t_default/index.html?videoId=%s' @@ -86,7 +85,7 @@ class NZHeraldIE(InfoExtractor): """The initial webpage may include the brightcove player embed url""" bc_url = BrightcoveNewIE._extract_url(self, webpage) return bc_url or self._search_regex( - r'(?:embedUrl)\"\s*:\s*\"(?P<embed_url>%s)' % BrightcoveNewIE._VALID_URL, + rf'(?:embedUrl)\"\s*:\s*\"(?P<embed_url>{BrightcoveNewIE._VALID_URL})', webpage, 'embed url', default=None, group='embed_url') def _real_extract(self, url): @@ -108,7 +107,7 @@ class NZHeraldIE(InfoExtractor): bc_video_id = traverse_obj( video_metadata or fusion_metadata, # fusion metadata is the video metadata for video-only pages 'brightcoveId', ('content_elements', ..., 'referent', 'id'), - get_all=False, expected_type=compat_str) + get_all=False, expected_type=str) if not bc_video_id: if isinstance(video_metadata, dict) and len(video_metadata) == 0: diff --git a/yt_dlp/extractor/nzonscreen.py b/yt_dlp/extractor/nzonscreen.py index bf2dbca..5fc516d 100644 --- a/yt_dlp/extractor/nzonscreen.py +++ b/yt_dlp/extractor/nzonscreen.py @@ -89,5 +89,5 @@ class NZOnScreenIE(InfoExtractor): 'http_headers': { 'Referer': 'https://www.nzonscreen.com/', 'Origin': 'https://www.nzonscreen.com/', - } + }, } diff --git a/yt_dlp/extractor/odkmedia.py b/yt_dlp/extractor/odkmedia.py index 8321b07..766cb94 100644 --- a/yt_dlp/extractor/odkmedia.py +++ b/yt_dlp/extractor/odkmedia.py @@ -24,7 +24,7 @@ class OnDemandChinaEpisodeIE(InfoExtractor): 'thumbnail': 'https://d2y2efdi5wgkcl.cloudfront.net/fit-in/256x256/media-io/2020/9/11/image.d9816e81.jpg', 'description': '疫情严峻,党政军民学、东西南北中协同应考', 'tags': ['Social Humanities', 'Documentary', 'Medical', 'Social'], - } + }, }] _QUERY = ''' @@ -101,5 +101,5 @@ class OnDemandChinaEpisodeIE(InfoExtractor): or self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)), 'formats': formats, 'subtitles': subtitles, - 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')) + 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')), } diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 1be45d8..d27d1c3 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -1,12 +1,7 @@ import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_parse_qs, - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, -) +from ..compat import compat_etree_fromstring from ..networking import HEADRequest from ..utils import ( ExtractorError, @@ -257,8 +252,8 @@ class OdnoklassnikiIE(InfoExtractor): raise e def _extract_desktop(self, url): - start_time = int_or_none(compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) + start_time = int_or_none(urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get('fromTime', [None])[0]) url, smuggled = unsmuggle_url(url, {}) video_id, is_embed = self._match_valid_url(url).group('id', 'embed') @@ -281,7 +276,7 @@ class OdnoklassnikiIE(InfoExtractor): player = self._parse_json( unescapeHTML(self._search_regex( - r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id, + rf'data-options=(?P<quote>["\'])(?P<player>{{.+?{video_id}.+?}})(?P=quote)', webpage, 'player', group='player')), video_id) @@ -300,7 +295,7 @@ class OdnoklassnikiIE(InfoExtractor): if st_location: data['st.location'] = st_location metadata = self._download_json( - compat_urllib_parse_unquote(flashvars['metadataUrl']), + urllib.parse.unquote(flashvars['metadataUrl']), video_id, 'Downloading metadata JSON', data=urlencode_postdata(data)) @@ -434,7 +429,7 @@ class OdnoklassnikiIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://m.ok.ru/video/%s' % video_id, video_id, + f'http://m.ok.ru/video/{video_id}', video_id, note='Downloading mobile webpage') error = self._search_regex( @@ -460,5 +455,5 @@ class OdnoklassnikiIE(InfoExtractor): 'format_id': 'mobile', 'url': redirect_url, 'ext': 'mp4', - }] + }], } diff --git a/yt_dlp/extractor/oftv.py b/yt_dlp/extractor/oftv.py index 4cac518..415694c 100644 --- a/yt_dlp/extractor/oftv.py +++ b/yt_dlp/extractor/oftv.py @@ -20,8 +20,8 @@ class OfTVIE(InfoExtractor): 'timestamp': 1652391300, 'upload_date': '20220512', 'view_count': 0, - 'creator': 'This is Fire' - } + 'creator': 'This is Fire', + }, }] def _real_extract(self, url): @@ -39,8 +39,8 @@ class OfTVPlaylistIE(InfoExtractor): 'url': 'https://of.tv/creators/this-is-fire/', 'playlist_count': 8, 'info_dict': { - 'id': 'this-is-fire' - } + 'id': 'this-is-fire', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/oktoberfesttv.py b/yt_dlp/extractor/oktoberfesttv.py index e0ac856..b4bcdc7 100644 --- a/yt_dlp/extractor/oktoberfesttv.py +++ b/yt_dlp/extractor/oktoberfesttv.py @@ -15,7 +15,7 @@ class OktoberfestTVIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 5507d2f..becf052 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -24,9 +24,9 @@ class OlympicsReplayIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) + video_id = self._match_id(url) - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, video_id) title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage) uuid = self._html_search_meta('episode_uid', webpage) m3u8_url = self._html_search_meta('video_url', webpage) @@ -46,7 +46,7 @@ class OlympicsReplayIE(InfoExtractor): thumbnails.append({ 'url': thumbnail, 'width': width, - 'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)) + 'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)), }) m3u8_url = self._download_json( f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') @@ -58,5 +58,5 @@ class OlympicsReplayIE(InfoExtractor): 'thumbnails': thumbnails, 'formats': formats, 'subtitles': subtitles, - **json_ld + **json_ld, } diff --git a/yt_dlp/extractor/on24.py b/yt_dlp/extractor/on24.py index 9a4abc9..05218e9 100644 --- a/yt_dlp/extractor/on24.py +++ b/yt_dlp/extractor/on24.py @@ -27,7 +27,7 @@ class On24IE(InfoExtractor): 'upload_date': '20200219', 'timestamp': 1582149600.0, 'view_count': int, - } + }, }, { 'url': 'https://event.on24.com/wcc/r/2639291/82829018E813065A122363877975752E?mode=login&email=johnsmith@gmail.com', 'only_matching': True, @@ -47,7 +47,7 @@ class On24IE(InfoExtractor): 'eventId': event_id, 'displayProfile': 'player', 'key': event_key, - 'contentType': 'A' + 'contentType': 'A', }) event_id = str(try_get(event_data, lambda x: x['presentationLogInfo']['eventid'])) or event_id language = event_data.get('localelanguagecode') @@ -74,7 +74,7 @@ class On24IE(InfoExtractor): 'language': language, 'ext': 'wav', 'vcodec': 'none', - 'acodec': 'wav' + 'acodec': 'wav', }) return { diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py index e1b7268..ee432e8 100644 --- a/yt_dlp/extractor/onefootball.py +++ b/yt_dlp/extractor/onefootball.py @@ -36,7 +36,7 @@ class OneFootballIE(InfoExtractor): 'tags': ['Football', 'Soccer', 'OneFootball'], '_old_archive_ids': ['onefootball 34041020'], }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py index 351b397..c849da0 100644 --- a/yt_dlp/extractor/onenewsnz.py +++ b/yt_dlp/extractor/onenewsnz.py @@ -26,8 +26,8 @@ class OneNewsNZIE(InfoExtractor): 'duration': 38.272, 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'Greenpeace accused the Government of "greenwashing" instead of taking climate action.', - } - }] + }, + }], }, { # YouTube video 'url': 'https://www.1news.co.nz/2022/09/30/now-is-the-time-to-care-about-womens-rugby/', @@ -59,8 +59,8 @@ class OneNewsNZIE(InfoExtractor): 'availability': 'public', 'playable_in_embed': True, 'live_status': 'not_live', - } - }] + }, + }], }, { # 2 Brightcove videos 'url': 'https://www.1news.co.nz/2022/09/29/raw-videos-capture-hurricane-ians-fury-as-it-slams-florida/', @@ -89,7 +89,7 @@ class OneNewsNZIE(InfoExtractor): brightcove_config = traverse_obj(item, ('embed', 'config')) brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % ( traverse_obj(brightcove_config, 'brightcoveAccount') or '963482464001', - traverse_obj(brightcove_config, 'brightcoveVideoId') + traverse_obj(brightcove_config, 'brightcoveVideoId'), ) entries.append(self.url_result(brightcove_url, BrightcoveNewIE)) elif item_type == 'youtube': diff --git a/yt_dlp/extractor/oneplace.py b/yt_dlp/extractor/oneplace.py index 86337ad..12e6ef6 100644 --- a/yt_dlp/extractor/oneplace.py +++ b/yt_dlp/extractor/oneplace.py @@ -10,7 +10,7 @@ class OnePlacePodcastIE(InfoExtractor): 'ext': 'mp3', 'title': 'Living in the Last Days Part 2 | A Daily Walk with John Randall', 'description': 'md5:fbb8f1cf21447ac54ecaa2887fc20c6e', - } + }, }, { 'url': 'https://www.oneplace.com/ministries/ankerberg-show/listen/ep-3-relying-on-the-constant-companionship-of-the-holy-spirit-part-2-922513.html', 'info_dict': { @@ -18,7 +18,7 @@ class OnePlacePodcastIE(InfoExtractor): 'ext': 'mp3', 'description': 'md5:8b810b4349aa40a5d033b4536fe428e1', 'title': 'md5:ce10f7d8d5ddcf485ed8905ef109659d', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py index da10f37..05e4d69 100644 --- a/yt_dlp/extractor/onet.py +++ b/yt_dlp/extractor/onet.py @@ -39,7 +39,7 @@ class OnetBaseIE(InfoExtractor): error = response.get('error') if error: raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error['message']), expected=True) + '{} said: {}'.format(self.IE_NAME, error['message']), expected=True) video = response['result'].get('0') @@ -182,7 +182,7 @@ class OnetChannelIE(OnetBaseIE): return self._extract_from_id(video_id, webpage) matches = re.findall( - r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE, + rf'<a[^>]+href=[\'"]({self._URL_BASE_RE}[a-z]+/[0-9a-z-]+/[0-9a-z]+)', webpage) entries = [ self.url_result(video_link, OnetIE.ie_key()) @@ -256,4 +256,4 @@ class OnetPlIE(InfoExtractor): mvp_id = self._search_mvp_id(webpage) return self.url_result( - 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id) + f'onetmvp:{mvp_id}', OnetMVPIE.ie_key(), video_id=mvp_id) diff --git a/yt_dlp/extractor/onionstudios.py b/yt_dlp/extractor/onionstudios.py index 5fa49e1..7e30b2d 100644 --- a/yt_dlp/extractor/onionstudios.py +++ b/yt_dlp/extractor/onionstudios.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import js_to_json @@ -34,7 +33,7 @@ class OnionStudiosIE(InfoExtractor): webpage = self._download_webpage( 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js', video_id) - mcp_id = compat_str(self._parse_json(self._search_regex( + mcp_id = str(self._parse_json(self._search_regex( r'window\.mcpMapping\s*=\s*({.+?});', webpage, 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id']) return self.url_result( diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py index 12bf557..a4b0a19 100644 --- a/yt_dlp/extractor/opencast.py +++ b/yt_dlp/extractor/opencast.py @@ -126,7 +126,7 @@ class OpencastIE(OpencastBaseIE): 'series': 'Kryptographie - WiSe 15/16', 'creator': 'Alexander May', }, - } + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index 56b8330..2d56252 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -4,8 +4,8 @@ import json import os import subprocess import tempfile +import urllib.parse -from ..compat import compat_urlparse from ..utils import ( ExtractorError, Popen, @@ -121,7 +121,7 @@ class PhantomJSwrapper: if is_outdated_version(version, required_version): self.extractor._downloader.report_warning( 'Your copy of PhantomJS is outdated, update it to version ' - '%s or newer if you encounter any errors.' % required_version) + f'{required_version} or newer if you encounter any errors.') for name in self._TMP_FILE_NAMES: tmp = tempfile.NamedTemporaryFile(delete=False) @@ -146,9 +146,9 @@ class PhantomJSwrapper: if 'path' not in cookie: cookie['path'] = '/' if 'domain' not in cookie: - cookie['domain'] = compat_urlparse.urlparse(url).netloc + cookie['domain'] = urllib.parse.urlparse(url).netloc with open(self._TMP_FILES['cookies'].name, 'wb') as f: - f.write(json.dumps(cookies).encode('utf-8')) + f.write(json.dumps(cookies).encode()) def _load_cookies(self): with open(self._TMP_FILES['cookies'].name, 'rb') as f: @@ -201,7 +201,7 @@ class PhantomJSwrapper: if not html: html = self.extractor._download_webpage(url, video_id, note=note, headers=headers) with open(self._TMP_FILES['html'].name, 'wb') as f: - f.write(html.encode('utf-8')) + f.write(html.encode()) self._save_cookies(url) diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index c9a96ae..b4f1c7d 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, get_first, @@ -120,9 +119,9 @@ class OpenRecCaptureIE(OpenRecBaseIE): 'title': capture_data.get('title'), 'thumbnail': capture_data.get('thumbnailUrl'), 'formats': formats, - 'timestamp': unified_timestamp(traverse_obj(movie_store, 'createdAt', expected_type=compat_str)), - 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str), - 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str), + 'timestamp': unified_timestamp(traverse_obj(movie_store, 'createdAt', expected_type=str)), + 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=str), + 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=str), 'upload_date': unified_strdate(capture_data.get('createdAt')), 'http_headers': self._M3U8_HEADERS, } diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py index 0e7a848..c6ba4b0 100644 --- a/yt_dlp/extractor/ora.py +++ b/yt_dlp/extractor/ora.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( get_element_by_attribute, qualities, @@ -19,7 +19,7 @@ class OraTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Vine & YouTube Stars Zach King & King Bach On Their Viral Videos!', 'description': 'md5:ebbc5b1424dd5dba7be7538148287ac1', - } + }, }, { 'url': 'http://www.unsafespeech.com/video/2016/5/10/student-self-censorship-and-the-thought-police-on-university-campuses-0_6622bnkppw4d', 'only_matching': True, @@ -38,14 +38,14 @@ class OraTVIE(InfoExtractor): m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) # similar to GameSpotIE - m3u8_path = compat_urlparse.urlparse(m3u8_url).path + m3u8_path = urllib.parse.urlparse(m3u8_url).path QUALITIES_RE = r'((,[a-z]+\d+)+,?)' available_qualities = self._search_regex( QUALITIES_RE, m3u8_path, 'qualities').strip(',').split(',') http_path = m3u8_path[1:].split('/', 1)[1] http_template = re.sub(QUALITIES_RE, r'%s', http_path) http_template = http_template.replace('.csmil/master.m3u8', '') - http_template = compat_urlparse.urljoin( + http_template = urllib.parse.urljoin( 'http://videocdn-pmd.ora.tv/', http_template) preference = qualities( ['mobile400', 'basic400', 'basic600', 'sd900', 'sd1200', 'sd1500', 'hd720', 'hd1080']) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 3c837be..9c37a54 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -12,7 +12,9 @@ from ..utils import ( mimetype2ext, orderedSet, parse_age_limit, + parse_iso8601, remove_end, + str_or_none, strip_jsonp, try_call, unified_strdate, @@ -63,8 +65,8 @@ class ORFRadioIE(InfoExtractor): 'duration': 18000, 'timestamp': 1659322789, 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a', - } - }] + }, + }], }, { 'url': 'https://ooe.orf.at/player/20220801/OGMO', 'info_dict': { @@ -82,8 +84,8 @@ class ORFRadioIE(InfoExtractor): 'duration': 18000, 'timestamp': 1659322789, 'description': 'md5:a3f6083399ef92b8cbe2d421b180835a', - } - }] + }, + }], }, { 'url': 'http://fm4.orf.at/player/20170107/4CC', 'only_matching': True, @@ -125,7 +127,7 @@ class ORFRadioIE(InfoExtractor): 'timestamp': 1483858796, 'upload_date': '20170108', }, - 'skip': 'Shows from ORF radios are only available for 7 days.' + 'skip': 'Shows from ORF radios are only available for 7 days.', }] def _entries(self, data, station): @@ -173,7 +175,7 @@ class ORFPodcastIE(InfoExtractor): 'duration': 3396.0, 'series': 'Frühstück bei mir', }, - 'skip': 'ORF podcasts are only available for a limited time' + 'skip': 'ORF podcasts are only available for a limited time', }] def _real_extract(self, url): @@ -219,13 +221,13 @@ class ORFIPTVIE(InfoExtractor): story_id = self._match_id(url) webpage = self._download_webpage( - 'http://iptv.orf.at/stories/%s' % story_id, story_id) + f'http://iptv.orf.at/stories/{story_id}', story_id) video_id = self._search_regex( r'data-video(?:id)?="(\d+)"', webpage, 'video id') data = self._download_json( - 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, + f'http://bits.orf.at/filehandler/static-api/json/current/data.json?file={video_id}', video_id)[0] duration = float_or_none(data['duration'], 1000) @@ -324,7 +326,7 @@ class ORFFM4StoryIE(InfoExtractor): all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage)) for idx, video_id in enumerate(all_ids): data = self._download_json( - 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, + f'http://bits.orf.at/filehandler/static-api/json/current/data.json?file={video_id}', video_id)[0] duration = float_or_none(data['duration'], 1000) @@ -390,7 +392,7 @@ class ORFFM4StoryIE(InfoExtractor): class ORFONIE(InfoExtractor): IE_NAME = 'orf:on' - _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)' + _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)(?:/(?P<segment>\d+))?' _TESTS = [{ 'url': 'https://on.orf.at/video/14210000/school-of-champions-48', 'info_dict': { @@ -401,10 +403,14 @@ class ORFONIE(InfoExtractor): 'title': 'School of Champions (4/8)', 'description': 'md5:d09ad279fc2e8502611e7648484b6afd', 'media_type': 'episode', - 'timestamp': 1706472362, - 'upload_date': '20240128', + 'timestamp': 1706558922, + 'upload_date': '20240129', + 'release_timestamp': 1706472362, + 'release_date': '20240128', + 'modified_timestamp': 1712756663, + 'modified_date': '20240410', '_old_archive_ids': ['orftvthek 14210000'], - } + }, }, { 'url': 'https://on.orf.at/video/3220355', 'md5': 'f94d98e667cf9a3851317efb4e136662', @@ -418,18 +424,87 @@ class ORFONIE(InfoExtractor): 'media_type': 'episode', 'timestamp': 52916400, 'upload_date': '19710905', + 'release_timestamp': 52916400, + 'release_date': '19710905', + 'modified_timestamp': 1498536049, + 'modified_date': '20170627', '_old_archive_ids': ['orftvthek 3220355'], - } + }, + }, { + # Video with multiple segments selecting the second segment + 'url': 'https://on.orf.at/video/14226549/15639808/jugendbande-einbrueche-aus-langeweile', + 'md5': '90f4ebff86b4580837b8a361d0232a9e', + 'info_dict': { + 'id': '15639808', + 'ext': 'mp4', + 'duration': 97.707, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0175/43/thumb_17442704_segments_highlight_teaser.jpg', + 'title': 'Jugendbande: Einbrüche aus Langeweile', + 'description': 'md5:193df0bf0d91cf16830c211078097120', + 'media_type': 'segment', + 'timestamp': 1715792400, + 'upload_date': '20240515', + 'modified_timestamp': 1715794394, + 'modified_date': '20240515', + '_old_archive_ids': ['orftvthek 15639808'], + }, + 'params': {'noplaylist': True}, + }, { + # Video with multiple segments and no combined version + 'url': 'https://on.orf.at/video/14227864/formel-1-grosser-preis-von-monaco-2024', + 'info_dict': { + '_type': 'multi_video', + 'id': '14227864', + 'duration': 18410.52, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/04/thumb_17503881_segments_highlight_teaser.jpg', + 'title': 'Formel 1: Großer Preis von Monaco 2024', + 'description': 'md5:aeeb010710ccf70ce28ccb4482243d4f', + 'media_type': 'episode', + 'timestamp': 1716721200, + 'upload_date': '20240526', + 'release_timestamp': 1716721802, + 'release_date': '20240526', + 'modified_timestamp': 1716967501, + 'modified_date': '20240529', + }, + 'playlist_count': 42, + }, { + # Video with multiple segments, but with combined version + 'url': 'https://on.orf.at/video/14228172', + 'info_dict': { + 'id': '14228172', + 'ext': 'mp4', + 'duration': 3294.878, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/17/thumb_17516455_segments_highlight_teaser.jpg', + 'title': 'Willkommen Österreich mit Stermann & Grissemann', + 'description': 'md5:5de034d033a9c27f989343be3bbd4839', + 'media_type': 'episode', + 'timestamp': 1716926584, + 'upload_date': '20240528', + 'release_timestamp': 1716919202, + 'release_date': '20240528', + 'modified_timestamp': 1716968045, + 'modified_date': '20240529', + '_old_archive_ids': ['orftvthek 14228172'], + }, }] - def _extract_video(self, video_id): - encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() - api_json = self._download_json( - f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id) - - if traverse_obj(api_json, 'is_drm_protected'): - self.report_drm(video_id) - + @staticmethod + def _parse_metadata(api_json): + return traverse_obj(api_json, { + 'id': ('id', {int}, {str_or_none}), + 'age_limit': ('age_classification', {parse_age_limit}), + 'duration': ('exact_duration', {functools.partial(float_or_none, scale=1000)}), + 'title': (('title', 'headline'), {str}), + 'description': (('description', 'teaser_text'), {str}), + 'media_type': ('video_type', {str}), + 'thumbnail': ('_embedded', 'image', 'public_urls', 'highlight_teaser', 'url', {url_or_none}), + 'timestamp': (('date', 'episode_date'), {parse_iso8601}), + 'release_timestamp': ('release_date', {parse_iso8601}), + 'modified_timestamp': ('updated_at', {parse_iso8601}), + }, get_all=False) + + def _extract_video_info(self, video_id, api_json): formats, subtitles = [], {} for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)): for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})): @@ -454,24 +529,31 @@ class ORFONIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, '_old_archive_ids': [make_archive_id('ORFTVthek', video_id)], - **traverse_obj(api_json, { - 'age_limit': ('age_classification', {parse_age_limit}), - 'duration': ('duration_second', {float_or_none}), - 'title': (('title', 'headline'), {str}), - 'description': (('description', 'teaser_text'), {str}), - 'media_type': ('video_type', {str}), - }, get_all=False), + **self._parse_metadata(api_json), } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + video_id, segment_id = self._match_valid_url(url).group('id', 'segment') - return { - 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), - 'description': self._html_search_meta( - ['description', 'og:description', 'twitter:description'], webpage, default=None), - **self._search_json_ld(webpage, video_id, fatal=False), - **self._extract_video(video_id), - } + encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() + api_json = self._download_json( + f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id) + + if traverse_obj(api_json, 'is_drm_protected'): + self.report_drm(video_id) + + segments = traverse_obj(api_json, ('_embedded', 'segments', lambda _, v: v['id'])) + selected_segment = traverse_obj(segments, (lambda _, v: str(v['id']) == segment_id, any)) + + # selected_segment will be falsy if input URL did not include a valid segment_id + if selected_segment and not self._yes_playlist(video_id, segment_id, playlist_label='episode', video_label='segment'): + return self._extract_video_info(segment_id, selected_segment) + + # Even some segmented videos have an unsegmented version available in API response root + if (self._configuration_arg('prefer_segments_playlist') + or not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none}))): + return self.playlist_result( + (self._extract_video_info(str(segment['id']), segment) for segment in segments), + video_id, **self._parse_metadata(api_json), multi_video=True) + + return self._extract_video_info(video_id, api_json) diff --git a/yt_dlp/extractor/outsidetv.py b/yt_dlp/extractor/outsidetv.py index b1fcbd6..b9191c9 100644 --- a/yt_dlp/extractor/outsidetv.py +++ b/yt_dlp/extractor/outsidetv.py @@ -13,7 +13,7 @@ class OutsideTVIE(InfoExtractor): 'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd', 'upload_date': '20181225', 'timestamp': 1545742800, - } + }, }, { 'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4', 'only_matching': True, diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py index 3e969c8..38ffd34 100644 --- a/yt_dlp/extractor/packtpub.py +++ b/yt_dlp/extractor/packtpub.py @@ -63,7 +63,7 @@ class PacktPubIE(PacktPubBaseIE): headers['Authorization'] = 'Bearer ' + self._TOKEN try: video_url = self._download_json( - 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id, + f'https://services.packtpub.com/products-v1/products/{course_id}/{chapter_id}/{video_id}', video_id, 'Downloading JSON video', headers=headers)['data'] except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 400: @@ -109,17 +109,16 @@ class PacktPubCourseIE(PacktPubBaseIE): @classmethod def suitable(cls, url): - return False if PacktPubIE.suitable(url) else super( - PacktPubCourseIE, cls).suitable(url) + return False if PacktPubIE.suitable(url) else super().suitable(url) def _real_extract(self, url): mobj = self._match_valid_url(url) url, course_id = mobj.group('url', 'id') course = self._download_json( - self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id) + self._STATIC_PRODUCTS_BASE + f'{course_id}/toc', course_id) metadata = self._download_json( - self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id, + self._STATIC_PRODUCTS_BASE + f'{course_id}/summary', course_id, fatal=False) or {} entries = [] diff --git a/yt_dlp/extractor/palcomp3.py b/yt_dlp/extractor/palcomp3.py index 4b0801c..138a785 100644 --- a/yt_dlp/extractor/palcomp3.py +++ b/yt_dlp/extractor/palcomp3.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, str_or_none, @@ -30,7 +29,7 @@ class PalcoMP3BaseIE(InfoExtractor): })['data'] def _parse_music(self, music): - music_id = compat_str(music['musicID']) + music_id = str(music['musicID']) title = music['title'] formats = [] @@ -77,12 +76,12 @@ class PalcoMP3IE(PalcoMP3BaseIE): 'title': 'Nossas Composições - CUIDA BEM DELA', 'duration': 210, 'view_count': int, - } + }, }] @classmethod def suitable(cls, url): - return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url) + return False if PalcoMP3VideoIE.suitable(url) else super().suitable(url) class PalcoMP3ArtistIE(PalcoMP3BaseIE): @@ -106,7 +105,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE): @classmethod def suitable(cls, url): - return False if PalcoMP3IE._match_valid_url(url) else super(PalcoMP3ArtistIE, cls).suitable(url) + return False if PalcoMP3IE._match_valid_url(url) else super().suitable(url) def _real_extract(self, url): artist_slug = self._match_id(url) @@ -134,7 +133,7 @@ class PalcoMP3VideoIE(PalcoMP3BaseIE): 'upload_date': '20161107', 'uploader_id': 'maiaramaraisaoficial', 'uploader': 'Maiara e Maraisa', - } + }, }] _MUSIC_FIELDS = 'youtubeID' diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py index 6b25962..91f1055 100644 --- a/yt_dlp/extractor/panopto.py +++ b/yt_dlp/extractor/panopto.py @@ -3,9 +3,9 @@ import datetime as dt import functools import json import random +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse, compat_urlparse from ..utils import ( ExtractorError, OnDemandPagedList, @@ -44,7 +44,7 @@ class PanoptoBaseIE(InfoExtractor): 18: 'hu-HU', 19: 'nb-NO', 20: 'sv-SE', - 21: 'it-IT' + 21: 'it-IT', } def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs): @@ -66,7 +66,7 @@ class PanoptoBaseIE(InfoExtractor): @staticmethod def _parse_fragment(url): - return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()} + return {k: json.loads(v[0]) for k, v in urllib.parse.parse_qs(urllib.parse.urlparse(url).fragment).items()} class PanoptoIE(PanoptoBaseIE): @@ -88,7 +88,7 @@ class PanoptoIE(PanoptoBaseIE): 'average_rating': int, 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e', 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', - 'channel': 'Showcase Videos' + 'channel': 'Showcase Videos', }, }, { @@ -131,7 +131,7 @@ class PanoptoIE(PanoptoBaseIE): 'uploader': 'Kathryn Kelly', 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3', 'channel': 'Getting Started', - } + }, }, { # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream. @@ -174,7 +174,7 @@ class PanoptoIE(PanoptoBaseIE): 'chapters': 'count:28', 'thumbnail': r're:https://demo\.hosted\.panopto\.com/.+', }, - 'params': {'format': 'mhtml', 'skip_download': True} + 'params': {'format': 'mhtml', 'skip_download': True}, }, { 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9', @@ -196,7 +196,7 @@ class PanoptoIE(PanoptoBaseIE): 'subtitles': {'en-US': [{'ext': 'srt', 'data': 'md5:a3f4d25963fdeace838f327097c13265'}], 'es-ES': [{'ext': 'srt', 'data': 'md5:57e9dad365fd0fbaf0468eac4949f189'}]}, }, - 'params': {'writesubtitles': True, 'skip_download': True} + 'params': {'writesubtitles': True, 'skip_download': True}, }, { # On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped. 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b', @@ -218,15 +218,15 @@ class PanoptoIE(PanoptoBaseIE): 'upload_date': '20191129', }, - 'params': {'writesubtitles': True, 'skip_download': True} + 'params': {'writesubtitles': True, 'skip_download': True}, }, { 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638', - 'only_matching': True + 'only_matching': True, }, ] @@ -254,7 +254,7 @@ class PanoptoIE(PanoptoBaseIE): 'StopReason': None, 'StreamID': stream_id, 'TimeStamp': timestamp_str, - 'UpdatesRejected': 0 + 'UpdatesRejected': 0, }, ]} @@ -274,7 +274,7 @@ class PanoptoIE(PanoptoBaseIE): chapters.append({ 'start_time': start, 'end_time': start + duration, - 'title': caption + 'title': caption, }) return chapters @@ -283,11 +283,11 @@ class PanoptoIE(PanoptoBaseIE): image_frags = {} for timestamp in timestamps or []: duration = timestamp.get('Duration') - obj_id, obj_sn = timestamp.get('ObjectIdentifier'), timestamp.get('ObjectSequenceNumber'), + obj_id, obj_sn = timestamp.get('ObjectIdentifier'), timestamp.get('ObjectSequenceNumber') if timestamp.get('EventTargetType') == 'PowerPoint' and obj_id is not None and obj_sn is not None: image_frags.setdefault('slides', []).append({ 'url': base_url + f'/Pages/Viewer/Image.aspx?id={obj_id}&number={obj_sn}', - 'duration': duration + 'duration': duration, }) obj_pid, session_id, abs_time = timestamp.get('ObjectPublicIdentifier'), timestamp.get('SessionID'), timestamp.get('AbsoluteTime') @@ -304,7 +304,7 @@ class PanoptoIE(PanoptoBaseIE): 'acodec': 'none', 'vcodec': 'none', 'url': 'about:invalid', - 'fragments': fragments + 'fragments': fragments, } @staticmethod @@ -329,8 +329,8 @@ class PanoptoIE(PanoptoBaseIE): 'deliveryId': video_id, 'getCaptions': True, 'language': str(lang), - 'responseType': 'json' - } + 'responseType': 'json', + }, ) if not isinstance(response, list): continue @@ -359,12 +359,12 @@ class PanoptoIE(PanoptoBaseIE): subtitles = self._merge_subtitles(subtitles, stream_subtitles) else: stream_formats.append({ - 'url': stream_url + 'url': stream_url, }) for fmt in stream_formats: fmt.update({ 'format_note': stream.get('Tag'), - **fmt_kwargs + **fmt_kwargs, }) formats.extend(stream_formats) @@ -384,7 +384,7 @@ class PanoptoIE(PanoptoBaseIE): 'isKollectiveAgentInstalled': 'false', 'isEmbed': 'false', 'responseType': 'json', - } + }, ) delivery = delivery_info['Delivery'] @@ -421,7 +421,7 @@ class PanoptoIE(PanoptoBaseIE): 'channel_id': delivery.get('SessionGroupPublicID'), 'channel': traverse_obj(delivery, 'SessionGroupLongName', 'SessionGroupShortName', get_all=False), 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } @@ -435,7 +435,7 @@ class PanoptoPlaylistIE(PanoptoBaseIE): 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36', 'description': '', }, - 'playlist_mincount': 36 + 'playlist_mincount': 36, }, { 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190', @@ -444,7 +444,7 @@ class PanoptoPlaylistIE(PanoptoBaseIE): 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190', 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb', }, - 'playlist_mincount': 4 + 'playlist_mincount': 4, }, ] @@ -466,7 +466,7 @@ class PanoptoPlaylistIE(PanoptoBaseIE): 'description': item.get('Description'), 'duration': item.get('Duration'), 'channel': traverse_obj(item, ('Parent', 'Name')), - 'channel_id': traverse_obj(item, ('Parent', 'Id')) + 'channel_id': traverse_obj(item, ('Parent', 'Id')), } def _real_extract(self, url): @@ -475,7 +475,7 @@ class PanoptoPlaylistIE(PanoptoBaseIE): video_id = get_first(parse_qs(url), 'id') if video_id: if self.get_param('noplaylist'): - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + self.to_screen(f'Downloading just video {video_id} because of --no-playlist') return self.url_result(base_url + f'/Pages/Viewer.aspx?id={video_id}', ie_key=PanoptoIE.ie_key(), video_id=video_id) else: self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}') @@ -495,28 +495,28 @@ class PanoptoListIE(PanoptoBaseIE): 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22', 'info_dict': { 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', - 'title': 'Showcase Videos' + 'title': 'Showcase Videos', }, - 'playlist_mincount': 140 + 'playlist_mincount': 140, }, { 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250', 'info_dict': { 'id': 'panopto_list', - 'title': 'panopto_list' + 'title': 'panopto_list', }, - 'playlist_mincount': 300 + 'playlist_mincount': 300, }, { # Folder that contains 8 folders and a playlist 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22', 'info_dict': { 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e', - 'title': 'Video Tutorials' + 'title': 'Video Tutorials', }, - 'playlist_mincount': 9 - } + 'playlist_mincount': 9, + }, ] @@ -559,7 +559,7 @@ class PanoptoListIE(PanoptoBaseIE): base_url, '/Services/Data.svc/GetFolderInfo', folder_id, data={'folderID': folder_id}, fatal=False) return { - 'title': get_first(response, 'Name') + 'title': get_first(response, 'Name'), } def _real_extract(self, url): diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py index 3f19803..317f53b 100644 --- a/yt_dlp/extractor/paramountplus.py +++ b/yt_dlp/extractor/paramountplus.py @@ -170,19 +170,19 @@ class ParamountPlusSeriesIE(InfoExtractor): 'playlist_mincount': 50, 'info_dict': { 'id': 'drake-josh', - } + }, }, { 'url': 'https://www.paramountplus.com/shows/hawaii_five_0/', 'playlist_mincount': 240, 'info_dict': { 'id': 'hawaii_five_0', - } + }, }, { 'url': 'https://www.paramountplus.com/shows/spongebob-squarepants/', 'playlist_mincount': 248, 'info_dict': { 'id': 'spongebob-squarepants', - } + }, }] def _entries(self, show_name): @@ -193,7 +193,7 @@ class ParamountPlusSeriesIE(InfoExtractor): return for episode in show_json['result']['data']: yield self.url_result( - 'https://www.paramountplus.com%s' % episode['url'], + 'https://www.paramountplus.com{}'.format(episode['url']), ie=ParamountPlusIE.ie_key(), video_id=episode['content_id']) def _real_extract(self, url): diff --git a/yt_dlp/extractor/parler.py b/yt_dlp/extractor/parler.py index 563012f..9be288a 100644 --- a/yt_dlp/extractor/parler.py +++ b/yt_dlp/extractor/parler.py @@ -87,5 +87,5 @@ class ParlerIE(InfoExtractor): 'view_count': ('views', {int_or_none}), 'comment_count': ('total_comments', {int_or_none}), 'repost_count': ('echos', {int_or_none}), - }) + }), } diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py index 777b008..b93b5ed 100644 --- a/yt_dlp/extractor/parlview.py +++ b/yt_dlp/extractor/parlview.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, try_get, @@ -24,7 +23,7 @@ class ParlviewIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'https://parlview.aph.gov.au/mediaPlayer.php?videoID=539936', 'only_matching': True, @@ -36,13 +35,13 @@ class ParlviewIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) media = self._download_json(self._API_URL % video_id, video_id).get('media') - timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], compat_str) or '/' + timestamp = try_get(media, lambda x: x['timeMap']['source']['timecode_offsets'][0], str) or '/' stream = try_get(media, lambda x: x['renditions'][0], dict) if not stream: self.raise_no_formats('No streams were detected') elif stream.get('streamType') != 'VOD': - self.raise_no_formats('Unknown type of stream was detected: "%s"' % str(stream.get('streamType'))) + self.raise_no_formats('Unknown type of stream was detected: "{}"'.format(str(stream.get('streamType')))) formats = self._extract_m3u8_formats(stream['url'], video_id, 'mp4', 'm3u8_native') media_info = self._download_webpage( diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 6c441ff..7d6e843 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -2,6 +2,7 @@ import itertools import urllib.parse from .common import InfoExtractor +from .sproutvideo import VidsIoIE from .vimeo import VimeoIE from ..networking.exceptions import HTTPError from ..utils import ( @@ -12,6 +13,7 @@ from ..utils import ( int_or_none, mimetype2ext, parse_iso8601, + smuggle_url, str_or_none, traverse_obj, url_or_none, @@ -33,7 +35,7 @@ class PatreonBaseIE(InfoExtractor): try: return self._download_json( f'https://www.patreon.com/api/{ep}', - item_id, note='Downloading API JSON' if not note else note, + item_id, note=note if note else 'Downloading API JSON', query=query, fatal=fatal, headers=headers) except ExtractorError as e: if not isinstance(e.cause, HTTPError) or mimetype2ext(e.cause.response.headers.get('Content-Type')) != 'json': @@ -113,7 +115,7 @@ class PatreonIE(PatreonBaseIE): 'params': { 'noplaylist': True, 'skip_download': True, - } + }, }, { 'url': 'https://www.patreon.com/posts/episode-166-of-743933', 'only_matching': True, @@ -133,7 +135,7 @@ class PatreonIE(PatreonBaseIE): 'description': 'md5:557a409bd79d3898689419094934ba79', 'uploader_id': '14936315', }, - 'skip': 'Patron-only content' + 'skip': 'Patron-only content', }, { # m3u8 video (https://github.com/yt-dlp/yt-dlp/issues/2277) 'url': 'https://www.patreon.com/posts/video-sketchbook-32452882', @@ -154,7 +156,7 @@ class PatreonIE(PatreonBaseIE): 'channel_id': '1641751', 'channel_url': 'https://www.patreon.com/loish', 'channel_follower_count': int, - } + }, }, { # bad videos under media (if media is included). Real one is under post_file 'url': 'https://www.patreon.com/posts/premium-access-70282931', @@ -305,22 +307,28 @@ class PatreonIE(PatreonBaseIE): 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}), })) + # all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo + headers = {'referer': 'https://patreon.com/'} + # handle Vimeo embeds if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo': v_url = urllib.parse.unquote(self._html_search_regex( r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '') if url_or_none(v_url) and self._request_webpage( - v_url, video_id, 'Checking Vimeo embed URL', - headers={'Referer': 'https://patreon.com/'}, - fatal=False, errnote=False): + v_url, video_id, 'Checking Vimeo embed URL', headers=headers, + fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection entries.append(self.url_result( VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'), VimeoIE, url_transparent=True)) embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none})) - if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False): - entries.append(self.url_result(embed_url)) + if embed_url and (urlh := self._request_webpage( + embed_url, video_id, 'Checking embed URL', headers=headers, + fatal=False, errnote=False, expected_status=403)): + # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie + if urlh.status != 403 or VidsIoIE.suitable(embed_url): + entries.append(self.url_result(smuggle_url(embed_url, headers))) post_file = traverse_obj(attributes, ('post_file', {dict})) if post_file: @@ -378,7 +386,7 @@ class PatreonIE(PatreonBaseIE): params.update({'page[cursor]': cursor} if cursor else {}) response = self._call_api( - f'posts/{post_id}/comments', post_id, query=params, note='Downloading comments page %d' % page) + f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}') cursor = None for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)): @@ -446,18 +454,18 @@ class PatreonCampaignIE(PatreonBaseIE): 'uploader_id': '37306634', 'thumbnail': r're:^https?://.*$', }, - 'playlist_mincount': 71 + 'playlist_mincount': 71, }, { 'url': 'https://www.patreon.com/dissonancepod/posts', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.patreon.com/m/5932659', - 'only_matching': True + 'only_matching': True, }] @classmethod def suitable(cls, url): - return False if PatreonIE.suitable(url) else super(PatreonCampaignIE, cls).suitable(url) + return False if PatreonIE.suitable(url) else super().suitable(url) def _entries(self, campaign_id): cursor = None @@ -472,7 +480,7 @@ class PatreonCampaignIE(PatreonBaseIE): for page in itertools.count(1): params.update({'page[cursor]': cursor} if cursor else {}) - posts_json = self._call_api('posts', campaign_id, query=params, note='Downloading posts page %d' % page) + posts_json = self._call_api('posts', campaign_id, query=params, note=f'Downloading posts page {page}') cursor = traverse_obj(posts_json, ('meta', 'pagination', 'cursors', 'next')) for post_url in traverse_obj(posts_json, ('data', ..., 'attributes', 'patreon_url')): @@ -486,13 +494,14 @@ class PatreonCampaignIE(PatreonBaseIE): campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT}) - campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID') + campaign_id = self._search_nextjs_data( + webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] params = { 'json-api-use-default-includes': 'false', 'fields[user]': 'full_name,url', 'fields[campaign]': 'name,summary,url,patron_count,creation_count,is_nsfw,avatar_photo_url', - 'include': 'creator' + 'include': 'creator', } campaign_response = self._call_api( diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index f6f5a5c..6867964 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( US_RATINGS, ExtractorError, @@ -182,18 +181,18 @@ class PBSIE(InfoExtractor): ) IE_NAME = 'pbs' - IE_DESC = 'Public Broadcasting Service (PBS) and member stations: %s' % ', '.join(list(zip(*_STATIONS))[1]) + IE_DESC = 'Public Broadcasting Service (PBS) and member stations: {}'.format(', '.join(list(zip(*_STATIONS))[1])) _VALID_URL = r'''(?x)https?:// (?: # Direct video URL - (?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) | + (?:{})/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) | # Article with embedded player (or direct video) - (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | + (?:www\.)?pbs\.org/(?:[^/]+/){{1,5}}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+) ) - ''' % '|'.join(list(zip(*_STATIONS))[0]) + '''.format('|'.join(next(zip(*_STATIONS)))) _GEO_COUNTRIES = ['US'] @@ -415,7 +414,7 @@ class PBSIE(InfoExtractor): { 'url': 'https://player.pbs.org/portalplayer/3004638221/?uid=', 'only_matching': True, - } + }, ] _ERRORS = { 101: 'We\'re sorry, but this video is not yet available.', @@ -518,7 +517,7 @@ class PBSIE(InfoExtractor): if not video_id: video_info = self._extract_video_data( player_page, 'video data', display_id) - video_id = compat_str( + video_id = str( video_info.get('id') or video_info['contentID']) else: video_id = mobj.group('id') @@ -539,7 +538,7 @@ class PBSIE(InfoExtractor): if isinstance(video_id, list): entries = [self.url_result( - 'http://video.pbs.org/video/%s' % vid_id, 'PBS', vid_id) + f'http://video.pbs.org/video/{vid_id}', 'PBS', vid_id) for vid_id in video_id] return self.playlist_result(entries, display_id) @@ -568,11 +567,11 @@ class PBSIE(InfoExtractor): # Player pages may also serve different qualities for page in ('widget/partnerplayer', 'portalplayer'): player = self._download_webpage( - 'http://player.pbs.org/%s/%s' % (page, video_id), - display_id, 'Downloading %s page' % page, fatal=False) + f'http://player.pbs.org/{page}/{video_id}', + display_id, f'Downloading {page} page', fatal=False) if player: video_info = self._extract_video_data( - player, '%s video data' % page, display_id, fatal=False) + player, f'{page} video data', display_id, fatal=False) if video_info: extract_redirect_urls(video_info) if not info: @@ -603,7 +602,7 @@ class PBSIE(InfoExtractor): redirect_id = redirect.get('eeid') redirect_info = self._download_json( - '%s?format=json' % redirect['url'], display_id, + '{}?format=json'.format(redirect['url']), display_id, 'Downloading %s video url info' % (redirect_id or num), headers=self.geo_verification_headers()) @@ -614,7 +613,7 @@ class PBSIE(InfoExtractor): self.raise_geo_restricted( msg=message, countries=self._GEO_COUNTRIES) raise ExtractorError( - '%s said: %s' % (self.IE_NAME, message), expected=True) + f'{self.IE_NAME} said: {message}', expected=True) format_url = redirect_info.get('url') if not format_url: @@ -649,7 +648,7 @@ class PBSIE(InfoExtractor): f_url = re.sub(r'\d+k|baseline', bitrate + 'k', http_url) # This may produce invalid links sometimes (e.g. # http://www.pbs.org/wgbh/frontline/film/suicide-plan) - if not self._is_valid_url(f_url, display_id, 'http-%sk video' % bitrate): + if not self._is_valid_url(f_url, display_id, f'http-{bitrate}k video'): continue f = m3u8_format.copy() f.update({ @@ -671,7 +670,7 @@ class PBSIE(InfoExtractor): captions = info.get('cc') or {} for caption_url in captions.values(): subtitles.setdefault('en', []).append({ - 'url': caption_url + 'url': caption_url, }) subtitles = self._merge_subtitles(subtitles, hls_subs) @@ -715,7 +714,7 @@ class PBSKidsIE(InfoExtractor): 'description': 'md5:d006b2211633685d8ebc8d03b6d5611e', 'categories': ['Episode'], 'upload_date': '20190718', - } + }, }, { 'url': 'https://pbskids.org/video/plum-landing/2365205059', @@ -730,8 +729,8 @@ class PBSKidsIE(InfoExtractor): 'description': 'md5:657e5fc4356a84ead1c061eb280ff05d', 'categories': ['Episode'], 'upload_date': '20140302', - } - } + }, + }, ] def _real_extract(self, url): @@ -753,5 +752,5 @@ class PBSKidsIE(InfoExtractor): 'series': ('video_obj', 'program_title', {str}), 'title': ('video_obj', 'title', {str}), 'upload_date': ('video_obj', 'air_date', {unified_strdate}), - }) + }), } diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py index 086eaaf..4bea04b 100644 --- a/yt_dlp/extractor/pearvideo.py +++ b/yt_dlp/extractor/pearvideo.py @@ -19,7 +19,7 @@ class PearVideoIE(InfoExtractor): 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d', 'timestamp': 1494275280, 'upload_date': '20170508', - } + }, } def _real_extract(self, url): @@ -43,7 +43,7 @@ class PearVideoIE(InfoExtractor): query={'contId': video_id}, headers={'Referer': url}) formats = [{ 'format_id': k, - 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v + 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v, } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v] title = self._search_regex( diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index fb4d025..2b69c7e 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -2,7 +2,6 @@ import functools import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( OnDemandPagedList, format_field, @@ -1316,13 +1315,13 @@ class PeerTubeIE(InfoExtractor): )''' _UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' _API_BASE = 'https://%s/api/v1/videos/%s/%s' - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) (?: peertube:(?P<host>[^:]+):| - https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/ + https?://(?P<host_2>{_INSTANCES_RE})/(?:videos/(?:watch|embed)|api/v\d/videos|w)/ ) - (?P<id>%s) - ''' % (_INSTANCES_RE, _UUID_RE) + (?P<id>{_UUID_RE}) + ''' _EMBED_REGEX = [r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//{_INSTANCES_RE}/videos/embed/{cls._UUID_RE})'''] _TESTS = [{ 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d', @@ -1349,7 +1348,7 @@ class PeerTubeIE(InfoExtractor): 'dislike_count': int, 'tags': ['framasoft', 'peertube'], 'categories': ['Science & Technology'], - } + }, }, { 'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e', 'info_dict': { @@ -1360,7 +1359,7 @@ class PeerTubeIE(InfoExtractor): 'timestamp': 1589276219, 'upload_date': '20200512', 'uploader': 'chocobozzz', - } + }, }, { 'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd', 'info_dict': { @@ -1394,7 +1393,7 @@ class PeerTubeIE(InfoExtractor): 'timestamp': 1587401293, 'upload_date': '20200420', 'uploader': 'Drew DeVault', - } + }, }, { 'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', 'only_matching': True, @@ -1416,14 +1415,13 @@ class PeerTubeIE(InfoExtractor): @staticmethod def _extract_peertube_url(webpage, source_url): mobj = re.match( - r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|w)/(?P<id>%s)' - % PeerTubeIE._UUID_RE, source_url) + rf'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|w)/(?P<id>{PeerTubeIE._UUID_RE})', source_url) if mobj and any(p in webpage for p in ( 'meta property="og:platform" content="PeerTube"', '<title>PeerTube<', 'There will be other non JS-based clients to access PeerTube', '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')): - return 'peertube:%s:%s' % mobj.group('host', 'id') + return 'peertube:{}:{}'.format(*mobj.group('host', 'id')) @classmethod def _extract_embed_urls(cls, url, webpage): @@ -1451,8 +1449,8 @@ class PeerTubeIE(InfoExtractor): return subtitles = {} for e in data: - language_id = try_get(e, lambda x: x['language']['id'], compat_str) - caption_url = urljoin('https://%s' % host, e.get('captionPath')) + language_id = try_get(e, lambda x: x['language']['id'], str) + caption_url = urljoin(f'https://{host}', e.get('captionPath')) if not caption_url: continue subtitles.setdefault(language_id or 'en', []).append({ @@ -1491,7 +1489,7 @@ class PeerTubeIE(InfoExtractor): continue file_size = int_or_none(file_.get('size')) format_id = try_get( - file_, lambda x: x['resolution']['label'], compat_str) + file_, lambda x: x['resolution']['label'], str) f = parse_resolution(format_id) f.update({ 'url': file_url, @@ -1526,7 +1524,7 @@ class PeerTubeIE(InfoExtractor): def channel_data(field, type_): return data('channel', field, type_) - category = data('category', 'label', compat_str) + category = data('category', 'label', str) categories = [category] if category else None nsfw = video.get('nsfw') @@ -1535,7 +1533,7 @@ class PeerTubeIE(InfoExtractor): else: age_limit = None - webpage_url = 'https://%s/videos/watch/%s' % (host, video_id) + webpage_url = f'https://{host}/videos/watch/{video_id}' return { 'id': video_id, @@ -1543,14 +1541,14 @@ class PeerTubeIE(InfoExtractor): 'description': description, 'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')), 'timestamp': unified_timestamp(video.get('publishedAt')), - 'uploader': account_data('displayName', compat_str), + 'uploader': account_data('displayName', str), 'uploader_id': str_or_none(account_data('id', int)), - 'uploader_url': url_or_none(account_data('url', compat_str)), - 'channel': channel_data('displayName', compat_str), + 'uploader_url': url_or_none(account_data('url', str)), + 'channel': channel_data('displayName', str), 'channel_id': str_or_none(channel_data('id', int)), - 'channel_url': url_or_none(channel_data('url', compat_str)), - 'language': data('language', 'id', compat_str), - 'license': data('licence', 'label', compat_str), + 'channel_url': url_or_none(channel_data('url', str)), + 'language': data('language', 'id', str), + 'license': data('licence', 'label', str), 'duration': int_or_none(video.get('duration')), 'view_count': int_or_none(video.get('views')), 'like_count': int_or_none(video.get('likes')), @@ -1573,9 +1571,9 @@ class PeerTubePlaylistIE(InfoExtractor): 'w/p': 'video-playlists', } _VALID_URL = r'''(?x) - https?://(?P<host>%s)/(?P<type>(?:%s))/ + https?://(?P<host>{})/(?P<type>(?:{}))/ (?P<id>[^/]+) - ''' % (PeerTubeIE._INSTANCES_RE, '|'.join(_TYPES.keys())) + '''.format(PeerTubeIE._INSTANCES_RE, '|'.join(_TYPES.keys())) _TESTS = [{ 'url': 'https://peertube.debian.social/w/p/hFdJoTuyhNJVa1cDWd1d12', 'info_dict': { @@ -1617,21 +1615,21 @@ class PeerTubePlaylistIE(InfoExtractor): return self._download_json( self._API_BASE % (host, base, name, path), name, **kwargs) - def fetch_page(self, host, id, type, page): + def fetch_page(self, host, playlist_id, playlist_type, page): page += 1 video_data = self.call_api( - host, id, + host, playlist_id, f'/videos?sort=-createdAt&start={self._PAGE_SIZE * (page - 1)}&count={self._PAGE_SIZE}&nsfw=both', - type, note=f'Downloading page {page}').get('data', []) + playlist_type, note=f'Downloading page {page}').get('data', []) for video in video_data: - shortUUID = video.get('shortUUID') or try_get(video, lambda x: x['video']['shortUUID']) + short_uuid = video.get('shortUUID') or try_get(video, lambda x: x['video']['shortUUID']) video_title = video.get('name') or try_get(video, lambda x: x['video']['name']) yield self.url_result( - f'https://{host}/w/{shortUUID}', PeerTubeIE.ie_key(), - video_id=shortUUID, video_title=video_title) + f'https://{host}/w/{short_uuid}', PeerTubeIE.ie_key(), + video_id=short_uuid, video_title=video_title) - def _extract_playlist(self, host, type, id): - info = self.call_api(host, id, '', type, note='Downloading playlist information', fatal=False) + def _extract_playlist(self, host, playlist_type, playlist_id): + info = self.call_api(host, playlist_id, '', playlist_type, note='Downloading playlist information', fatal=False) playlist_title = info.get('displayName') playlist_description = info.get('description') @@ -1641,13 +1639,12 @@ class PeerTubePlaylistIE(InfoExtractor): thumbnail = format_field(info, 'thumbnailPath', f'https://{host}%s') entries = OnDemandPagedList(functools.partial( - self.fetch_page, host, id, type), self._PAGE_SIZE) + self.fetch_page, host, playlist_id, playlist_type), self._PAGE_SIZE) return self.playlist_result( - entries, id, playlist_title, playlist_description, + entries, playlist_id, playlist_title, playlist_description, timestamp=playlist_timestamp, channel=channel, channel_id=channel_id, thumbnail=thumbnail) def _real_extract(self, url): - type, host, id = self._match_valid_url(url).group('type', 'host', 'id') - type = self._TYPES[type] - return self._extract_playlist(host, type, id) + playlist_type, host, playlist_id = self._match_valid_url(url).group('type', 'host', 'id') + return self._extract_playlist(host, self._TYPES[playlist_type], playlist_id) diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py index a709e21..726d5e1 100644 --- a/yt_dlp/extractor/peertv.py +++ b/yt_dlp/extractor/peertv.py @@ -48,5 +48,5 @@ class PeerTVIE(InfoExtractor): 'title': self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title').replace('\xa0', ' '), 'formats': formats, 'description': self._html_search_meta(('og:description', 'description'), webpage), - 'thumbnail': self._html_search_meta(('og:image', 'image'), webpage) + 'thumbnail': self._html_search_meta(('og:image', 'image'), webpage), } diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py index 7864299..5999d4a 100644 --- a/yt_dlp/extractor/peloton.py +++ b/yt_dlp/extractor/peloton.py @@ -36,12 +36,12 @@ class PelotonIE(InfoExtractor): 'chapters': 'count:1', 'subtitles': {'en': [{ 'url': r're:^https?://.+', - 'ext': 'vtt' + 'ext': 'vtt', }]}, }, 'params': { 'skip_download': 'm3u8', }, - '_skip': 'Account needed' + 'skip': 'Account needed', }, { 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8', 'info_dict': { @@ -57,11 +57,11 @@ class PelotonIE(InfoExtractor): 'duration': 1802, 'categories': ['Running'], 'is_live': False, - 'chapters': 'count:3' + 'chapters': 'count:3', }, 'params': { 'skip_download': 'm3u8', }, - '_skip': 'Account needed' + 'skip': 'Account needed', }] _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s' @@ -79,7 +79,7 @@ class PelotonIE(InfoExtractor): data=json.dumps({ 'username_or_email': username, 'password': password, - 'with_pubsub': False + 'with_pubsub': False, }).encode(), headers={'Content-Type': 'application/json', 'User-Agent': 'web'}) except ExtractorError as e: @@ -115,7 +115,7 @@ class PelotonIE(InfoExtractor): else: raise - metadata = self._download_json('https://api.onepeloton.com/api/ride/%s/details?stream_source=multichannel' % video_id, video_id) + metadata = self._download_json(f'https://api.onepeloton.com/api/ride/{video_id}/details?stream_source=multichannel', video_id) ride_data = metadata.get('ride') if not ride_data: raise ExtractorError('Missing stream metadata') @@ -133,7 +133,7 @@ class PelotonIE(InfoExtractor): subtitles = {} else: if ride_data.get('vod_stream_url'): - url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % ( + url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles={}&url={}?hdnea={}'.format( ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]), ride_data['vod_stream_url'], urllib.parse.quote(urllib.parse.quote(token))) @@ -147,14 +147,14 @@ class PelotonIE(InfoExtractor): if metadata.get('instructor_cues'): subtitles['cues'] = [{ 'data': json.dumps(metadata.get('instructor_cues')), - 'ext': 'json' + 'ext': 'json', }] category = ride_data.get('fitness_discipline_display_name') chapters = [{ 'start_time': segment.get('start_time_offset'), 'end_time': segment.get('start_time_offset') + segment.get('length'), - 'title': segment.get('name') + 'title': segment.get('name'), } for segment in traverse_obj(metadata, ('segments', 'segment_list'))] return { @@ -171,7 +171,7 @@ class PelotonIE(InfoExtractor): 'categories': [category] if category else None, 'tags': traverse_obj(ride_data, ('equipment_tags', ..., 'name')), 'is_live': is_live, - 'chapters': chapters + 'chapters': chapters, } @@ -194,12 +194,12 @@ class PelotonLiveIE(InfoExtractor): 'duration': 2014, 'categories': ['Cycling'], 'is_live': False, - 'chapters': 'count:3' + 'chapters': 'count:3', }, 'params': { 'skip_download': 'm3u8', }, - '_skip': 'Account needed' + 'skip': 'Account needed', } def _real_extract(self, url): @@ -208,7 +208,7 @@ class PelotonLiveIE(InfoExtractor): if peloton.get('ride_id'): if not peloton.get('is_live') or peloton.get('is_encore') or peloton.get('status') != 'PRE_START': - return self.url_result('https://members.onepeloton.com/classes/player/%s' % peloton['ride_id']) + return self.url_result('https://members.onepeloton.com/classes/player/{}'.format(peloton['ride_id'])) else: raise ExtractorError('Ride has not started', expected=True) else: diff --git a/yt_dlp/extractor/performgroup.py b/yt_dlp/extractor/performgroup.py index f4d7f22..df726c9 100644 --- a/yt_dlp/extractor/performgroup.py +++ b/yt_dlp/extractor/performgroup.py @@ -1,5 +1,5 @@ from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import int_or_none, join_nonempty class PerformGroupIE(InfoExtractor): @@ -15,12 +15,12 @@ class PerformGroupIE(InfoExtractor): 'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b', 'timestamp': 1511533477, 'upload_date': '20171124', - } + }, }] def _call_api(self, service, auth_token, content_id, referer_url): return self._download_json( - 'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id), + f'http://ep3.performfeeds.com/ep{service}/{auth_token}/{content_id}/', content_id, headers={ 'Referer': referer_url, 'Origin': 'http://player.performgroup.com', @@ -50,11 +50,8 @@ class PerformGroupIE(InfoExtractor): if not c_url: continue tbr = int_or_none(c.get('bitrate'), 1000) - format_id = 'http' - if tbr: - format_id += '-%d' % tbr formats.append({ - 'format_id': format_id, + 'format_id': join_nonempty('http', tbr), 'url': c_url, 'tbr': tbr, 'width': int_or_none(c.get('width')), diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py index d2351df..e3b33c4 100644 --- a/yt_dlp/extractor/periscope.py +++ b/yt_dlp/extractor/periscope.py @@ -9,18 +9,18 @@ from ..utils.traversal import traverse_obj class PeriscopeBaseIE(InfoExtractor): _M3U8_HEADERS = { - 'Referer': 'https://www.periscope.tv/' + 'Referer': 'https://www.periscope.tv/', } def _call_api(self, method, query, item_id): return self._download_json( - 'https://api.periscope.tv/api/v2/%s' % method, + f'https://api.periscope.tv/api/v2/{method}', item_id, query=query) def _parse_broadcast_data(self, broadcast, video_id): title = broadcast.get('status') or 'Periscope Broadcast' uploader = broadcast.get('user_display_name') or broadcast.get('username') - title = '%s - %s' % (uploader, title) if uploader else title + title = f'{uploader} - {title}' if uploader else title thumbnails = [{ 'url': broadcast[image], } for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)] @@ -40,7 +40,7 @@ class PeriscopeBaseIE(InfoExtractor): 'live_status': { 'running': 'is_live', 'not_started': 'is_upcoming', - }.get(traverse_obj(broadcast, ('state', {str.lower}))) or 'was_live' + }.get(traverse_obj(broadcast, ('state', {str.lower}))) or 'was_live', } @staticmethod @@ -165,7 +165,7 @@ class PeriscopeUserIE(PeriscopeBaseIE): webpage, 'data store', default='{}', group='data')), user_name) - user = list(data_store['UserCache']['users'].values())[0]['user'] + user = next(iter(data_store['UserCache']['users'].values()))['user'] user_id = user['id'] session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] @@ -182,7 +182,7 @@ class PeriscopeUserIE(PeriscopeBaseIE): entries = [ self.url_result( - 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id)) + f'https://www.periscope.tv/{user_name}/{broadcast_id}') for broadcast_id in broadcast_ids] return self.playlist_result(entries, user_id, title, description) diff --git a/yt_dlp/extractor/philharmoniedeparis.py b/yt_dlp/extractor/philharmoniedeparis.py index e8494a0..310ea0f 100644 --- a/yt_dlp/extractor/philharmoniedeparis.py +++ b/yt_dlp/extractor/philharmoniedeparis.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import try_get @@ -48,7 +47,7 @@ class PhilharmonieDeParisIE(InfoExtractor): video_id = self._match_id(url) config = self._download_json( - 'https://otoplayer.philharmoniedeparis.fr/fr/config/%s.json' % video_id, video_id, query={ + f'https://otoplayer.philharmoniedeparis.fr/fr/config/{video_id}.json', video_id, query={ 'id': video_id, 'lang': 'fr-FR', }) @@ -66,7 +65,7 @@ class PhilharmonieDeParisIE(InfoExtractor): formats = [] for format_id in ('mobile', 'desktop'): format_url = try_get( - files, lambda x: x[format_id]['file'], compat_str) + files, lambda x: x[format_id]['file'], str) if not format_url or format_url in format_urls: continue format_urls.add(format_url) @@ -91,7 +90,7 @@ class PhilharmonieDeParisIE(InfoExtractor): entry = extract_entry(chapter) if entry is None: continue - entry['id'] = '%s-%d' % (video_id, num) + entry['id'] = f'{video_id}-{num}' entries.append(entry) return self.playlist_result(entries, video_id, config.get('title')) diff --git a/yt_dlp/extractor/phoenix.py b/yt_dlp/extractor/phoenix.py index 5fa133a..63c2560 100644 --- a/yt_dlp/extractor/phoenix.py +++ b/yt_dlp/extractor/phoenix.py @@ -2,7 +2,6 @@ import re from .youtube import YoutubeIE from .zdf import ZDFBaseIE -from ..compat import compat_str from ..utils import ( int_or_none, merge_dicts, @@ -64,7 +63,7 @@ class PhoenixIE(ZDFBaseIE): article_id = self._match_id(url) article = self._download_json( - 'https://www.phoenix.de/response/id/%s' % article_id, article_id, + f'https://www.phoenix.de/response/id/{article_id}', article_id, 'Downloading article JSON') video = article['absaetze'][0] @@ -76,7 +75,7 @@ class PhoenixIE(ZDFBaseIE): video_id, ie=YoutubeIE.ie_key(), video_id=video_id, video_title=title) - video_id = compat_str(video.get('basename') or video.get('content')) + video_id = str(video.get('basename') or video.get('content')) details = self._download_json( 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php', @@ -91,7 +90,7 @@ class PhoenixIE(ZDFBaseIE): content_id = details['tracking']['nielsen']['content']['assetid'] info = self._extract_ptmd( - 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id, + f'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/{content_id}', content_id, None, url) duration = int_or_none(try_get( @@ -99,7 +98,7 @@ class PhoenixIE(ZDFBaseIE): timestamp = unified_timestamp(details.get('editorialDate')) series = try_get( details, lambda x: x['tracking']['nielsen']['content']['program'], - compat_str) + str) episode = title if details.get('contentType') == 'episode' else None thumbnails = [] diff --git a/yt_dlp/extractor/photobucket.py b/yt_dlp/extractor/photobucket.py index 71e9a48..a7e5bc0 100644 --- a/yt_dlp/extractor/photobucket.py +++ b/yt_dlp/extractor/photobucket.py @@ -1,7 +1,7 @@ import json +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote class PhotobucketIE(InfoExtractor): @@ -16,7 +16,7 @@ class PhotobucketIE(InfoExtractor): 'upload_date': '20130504', 'uploader': 'rachaneronas', 'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', - } + }, } def _real_extract(self, url): @@ -31,7 +31,7 @@ class PhotobucketIE(InfoExtractor): info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', webpage, 'info json') info = json.loads(info_json) - url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) + url = urllib.parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) return { 'id': video_id, 'url': url, diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index 87d912d..72e3748 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, clean_html, @@ -30,7 +31,7 @@ class PiaproIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.(?:png|jpg)$', 'upload_date': '20090901', 'view_count': int, - } + }, }, { 'note': 'There are break lines in description, mandating (?s) flag', 'url': 'https://piapro.jp/t/9cSd', @@ -47,13 +48,13 @@ class PiaproIE(InfoExtractor): 'view_count': int, 'thumbnail': r're:^https?://.*\.(?:png|jpg)$', 'uploader_id': 'cyankino', - } + }, }, { 'url': 'https://piapro.jp/content/hcw0z3a169wtemz6', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://piapro.jp/t/-SO-', - 'only_matching': True + 'only_matching': True, }] _login_status = False @@ -64,7 +65,7 @@ class PiaproIE(InfoExtractor): '_username': username, '_password': password, '_remember_me': 'on', - 'login': 'ログイン' + 'login': 'ログイン', } self._request_webpage('https://piapro.jp/login/', None) urlh = self._request_webpage( @@ -74,7 +75,7 @@ class PiaproIE(InfoExtractor): if urlh is False: login_ok = False else: - parts = compat_urlparse.urlparse(urlh.url) + parts = urllib.parse.urlparse(urlh.url) if parts.path != '/': login_ok = False if not login_ok: diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py index d415ba2..726fe41 100644 --- a/yt_dlp/extractor/picarto.py +++ b/yt_dlp/extractor/picarto.py @@ -17,14 +17,14 @@ class PicartoIE(InfoExtractor): 'ext': 'mp4', 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'timestamp': int, - 'is_live': True + 'is_live': True, }, 'skip': 'Stream is offline', } @classmethod def suitable(cls, url): - return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) + return False if PicartoVodIE.suitable(url) else super().suitable(url) def _real_extract(self, url): channel_id = self._match_id(url) @@ -42,7 +42,7 @@ class PicartoIE(InfoExtractor): getLoadBalancerUrl(channel_name: "%s") { url } -}''' % (channel_id, channel_id), +}''' % (channel_id, channel_id), # noqa: UP031 })['data'] metadata = data['channel'] @@ -80,7 +80,7 @@ class PicartoIE(InfoExtractor): 'is_live': True, 'channel': channel_id, 'channel_id': metadata.get('id'), - 'channel_url': 'https://picarto.tv/%s' % channel_id, + 'channel_url': f'https://picarto.tv/{channel_id}', 'age_limit': age_limit, 'formats': formats, } @@ -95,7 +95,7 @@ class PicartoVodIE(InfoExtractor): 'id': 'ArtofZod_2017.12.12.00.13.23.flv', 'ext': 'mp4', 'title': 'ArtofZod_2017.12.12.00.13.23.flv', - 'thumbnail': r're:^https?://.*\.jpg' + 'thumbnail': r're:^https?://.*\.jpg', }, 'skip': 'The VOD does not exist', }, { @@ -108,7 +108,7 @@ class PicartoVodIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg', 'channel': 'ArtofZod', 'age_limit': 18, - } + }, }, { 'url': 'https://picarto.tv/videopopout/Plague', 'only_matching': True, @@ -130,7 +130,7 @@ class PicartoVodIE(InfoExtractor): name }} }} -}}''' +}}''', })['data']['video'] file_name = data['file_name'] diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index 02ae2fe..ceb6562 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -45,7 +45,7 @@ class PikselIE(InfoExtractor): 'upload_date': '20161210', 'description': '', 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1488331553/3238987.jpg?w=640&h=480', - } + }, }, { # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al @@ -59,13 +59,13 @@ class PikselIE(InfoExtractor): 'timestamp': 1486171129, 'upload_date': '20170204', 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1495569155/3279887.jpg?w=640&h=360', - } + }, }, { # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/ 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477', 'only_matching': True, - } + }, ] def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.tech', fatal=True): @@ -84,7 +84,7 @@ class PikselIE(InfoExtractor): webpage = self._download_webpage(url, display_id) app_token = self._search_regex([ r'clientAPI\s*:\s*"([^"]+)"', - r'data-de-api-key\s*=\s*"([^"]+)"' + r'data-de-api-key\s*=\s*"([^"]+)"', ], webpage, 'app token') query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id} program = self._call_api( diff --git a/yt_dlp/extractor/pinkbike.py b/yt_dlp/extractor/pinkbike.py index e4e1caa..0cd9632 100644 --- a/yt_dlp/extractor/pinkbike.py +++ b/yt_dlp/extractor/pinkbike.py @@ -27,7 +27,7 @@ class PinkbikeIE(InfoExtractor): 'location': 'Victoria, British Columbia, Canada', 'view_count': int, 'comment_count': int, - } + }, }, { 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629', 'only_matching': True, @@ -37,7 +37,7 @@ class PinkbikeIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.pinkbike.com/video/%s' % video_id, video_id) + f'http://www.pinkbike.com/video/{video_id}', video_id) formats = [] for _, format_id, src in re.findall( @@ -72,7 +72,7 @@ class PinkbikeIE(InfoExtractor): def extract_count(webpage, label): return str_to_int(self._search_regex( - r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label, + rf'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>{label}', webpage, label, fatal=False)) view_count = extract_count(webpage, 'Views') @@ -89,5 +89,5 @@ class PinkbikeIE(InfoExtractor): 'location': location, 'view_count': view_count, 'comment_count': comment_count, - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/pinterest.py b/yt_dlp/extractor/pinterest.py index 8361fbb..07f2494 100644 --- a/yt_dlp/extractor/pinterest.py +++ b/yt_dlp/extractor/pinterest.py @@ -22,9 +22,9 @@ class PinterestBaseIE(InfoExtractor): def _call_api(self, resource, video_id, options): return self._download_json( - 'https://www.pinterest.com/resource/%sResource/get/' % resource, - video_id, 'Download %s JSON metadata' % resource, query={ - 'data': json.dumps({'options': options}) + f'https://www.pinterest.com/resource/{resource}Resource/get/', + video_id, f'Download {resource} JSON metadata', query={ + 'data': json.dumps({'options': options}), })['resource_response'] def _extract_video(self, data, extract_formats=True): @@ -32,7 +32,7 @@ class PinterestBaseIE(InfoExtractor): thumbnails = [] images = data.get('images') if isinstance(images, dict): - for thumbnail_id, thumbnail in images.items(): + for thumbnail in images.values(): if not isinstance(thumbnail, dict): continue thumbnail_url = url_or_none(thumbnail.get('url')) @@ -109,7 +109,7 @@ class PinterestBaseIE(InfoExtractor): class PinterestIE(PinterestBaseIE): - _VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE + _VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?P<id>\d+)' _TESTS = [{ # formats found in data['videos'] 'url': 'https://www.pinterest.com/pin/664281013778109217/', @@ -187,7 +187,7 @@ class PinterestIE(PinterestBaseIE): class PinterestCollectionIE(PinterestBaseIE): - _VALID_URL = r'%s/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE + _VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pinterest.ca/mashal0407/cool-diys/', 'info_dict': { @@ -207,15 +207,14 @@ class PinterestCollectionIE(PinterestBaseIE): @classmethod def suitable(cls, url): - return False if PinterestIE.suitable(url) else super( - PinterestCollectionIE, cls).suitable(url) + return False if PinterestIE.suitable(url) else super().suitable(url) def _real_extract(self, url): username, slug = self._match_valid_url(url).groups() board = self._call_api( 'Board', slug, { 'slug': slug, - 'username': username + 'username': username, })['data'] board_id = board['id'] options = { diff --git a/yt_dlp/extractor/pixivsketch.py b/yt_dlp/extractor/pixivsketch.py index 850c6f2..344cdb3 100644 --- a/yt_dlp/extractor/pixivsketch.py +++ b/yt_dlp/extractor/pixivsketch.py @@ -81,7 +81,7 @@ class PixivSketchIE(PixivSketchBaseIE): 'channel_id': str(traverse_obj(data, ('user', 'pixiv_user_id'), ('owner', 'user', 'pixiv_user_id'))), 'age_limit': 18 if data.get('is_r18') else 15 if data.get('is_r15') else 0, 'timestamp': unified_timestamp(data.get('created_at')), - 'is_live': True + 'is_live': True, } @@ -101,7 +101,7 @@ class PixivSketchUserIE(PixivSketchBaseIE): @classmethod def suitable(cls, url): - return super(PixivSketchUserIE, cls).suitable(url) and not PixivSketchIE.suitable(url) + return super().suitable(url) and not PixivSketchIE.suitable(url) def _real_extract(self, url): user_id = self._match_id(url) diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py index c72a387..f4355d0 100644 --- a/yt_dlp/extractor/pladform.py +++ b/yt_dlp/extractor/pladform.py @@ -39,7 +39,7 @@ class PladformIE(InfoExtractor): 'uploader': 'Comedy Club', 'duration': 367, }, - 'expected_warnings': ['HTTP Error 404: Not Found'] + 'expected_warnings': ['HTTP Error 404: Not Found'], }, { 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0', 'md5': '53362fac3a27352da20fa2803cc5cd6f', @@ -73,14 +73,14 @@ class PladformIE(InfoExtractor): def fail(text): raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, text), + f'{self.IE_NAME} returned error: {text}', expected=True) if not video: - targetUrl = self._request_webpage(url, video_id, note='Resolving final URL').url - if targetUrl == url: + target_url = self._request_webpage(url, video_id, note='Resolving final URL').url + if target_url == url: raise ExtractorError('Can\'t parse page') - return self.url_result(targetUrl) + return self.url_result(target_url) if video.tag == 'error': fail(video.text) @@ -111,7 +111,7 @@ class PladformIE(InfoExtractor): fail(error) webpage = self._download_webpage( - 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id, + f'http://video.pladform.ru/catalog/video/videoid/{video_id}', video_id) title = self._og_search_title(webpage, fatal=False) or xpath_text( diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py index a4b612a..9486183 100644 --- a/yt_dlp/extractor/planetmarathi.py +++ b/yt_dlp/extractor/planetmarathi.py @@ -24,7 +24,7 @@ class PlanetMarathiIE(InfoExtractor): 'duration': 5539, 'upload_date': '20210829', }, - }] # Trailer skipped + }], # Trailer skipped }, { 'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1', 'playlist_mincount': 10, @@ -43,18 +43,19 @@ class PlanetMarathiIE(InfoExtractor): 'duration': 29, 'upload_date': '20210829', }, - }] # Trailers, Episodes, other Character profiles skipped + }], # Trailers, Episodes, other Character profiles skipped }] def _real_extract(self, url): - id = self._match_id(url) + playlist_id = self._match_id(url) entries = [] - json_data = self._download_json(f'https://www.planetmarathi.com/api/v1/titles/{id}/assets', id)['assets'] + json_data = self._download_json( + f'https://www.planetmarathi.com/api/v1/titles/{playlist_id}/assets', playlist_id)['assets'] for asset in json_data: asset_title = asset['mediaAssetName']['en'] if asset_title == 'Movie': - asset_title = id.replace('-', ' ') - asset_id = f'{asset["sk"]}_{id}'.replace('#', '-') + asset_title = playlist_id.replace('-', ' ') + asset_id = f'{asset["sk"]}_{playlist_id}'.replace('#', '-') formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id) entries.append({ 'id': asset_id, @@ -68,4 +69,4 @@ class PlanetMarathiIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, }) - return self.playlist_result(entries, playlist_id=id) + return self.playlist_result(entries, playlist_id=playlist_id) diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py index d978c08..e29f4c2 100644 --- a/yt_dlp/extractor/platzi.py +++ b/yt_dlp/extractor/platzi.py @@ -1,8 +1,6 @@ +import base64 + from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_str, -) from ..utils import ( ExtractorError, clean_html, @@ -48,10 +46,10 @@ class PlatziBaseIE(InfoExtractor): None) for kind in ('error', 'password', 'nonFields'): - error = str_or_none(login.get('%sError' % kind)) + error = str_or_none(login.get(f'{kind}Error')) if error: raise ExtractorError( - 'Unable to login: %s' % error, expected=True) + f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') @@ -120,16 +118,16 @@ class PlatziIE(PlatziBaseIE): formats.extend(self._extract_m3u8_formats( format_url, lecture_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id, - note='Downloading %s m3u8 information' % server_id, + note=f'Downloading {server_id} m3u8 information', fatal=False)) elif format_id == 'dash': formats.extend(self._extract_mpd_formats( format_url, lecture_id, mpd_id=format_id, - note='Downloading %s MPD manifest' % server_id, + note=f'Downloading {server_id} MPD manifest', fatal=False)) content = str_or_none(desc.get('content')) - description = (clean_html(compat_b64decode(content).decode('utf-8')) + description = (clean_html(base64.b64decode(content).decode('utf-8')) if content else None) duration = int_or_none(material.get('duration'), invscale=60) @@ -168,7 +166,7 @@ class PlatziCourseIE(PlatziBaseIE): @classmethod def suitable(cls, url): - return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) + return False if PlatziIE.suitable(url) else super().suitable(url) def _real_extract(self, url): course_name = self._match_id(url) @@ -207,7 +205,7 @@ class PlatziCourseIE(PlatziBaseIE): 'chapter_id': chapter_id, }) - course_id = compat_str(try_get(props, lambda x: x['course']['id'])) - course_title = try_get(props, lambda x: x['course']['name'], compat_str) + course_id = str(try_get(props, lambda x: x['course']['id'])) + course_title = try_get(props, lambda x: x['course']['name'], str) return self.playlist_result(entries, course_id, course_title) diff --git a/yt_dlp/extractor/playsuisse.py b/yt_dlp/extractor/playsuisse.py index 7c5cad1..905f8fc 100644 --- a/yt_dlp/extractor/playsuisse.py +++ b/yt_dlp/extractor/playsuisse.py @@ -36,7 +36,7 @@ class PlaySuisseIE(InfoExtractor): 'episode': 'Knochen', 'episode_number': 1, 'thumbnail': 're:https://playsuisse-img.akamaized.net/', - } + }, }, { # film 'url': 'https://www.playsuisse.ch/watch/808675', @@ -48,7 +48,7 @@ class PlaySuisseIE(InfoExtractor): 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd', 'duration': 5280, 'thumbnail': 're:https://playsuisse-img.akamaized.net/', - } + }, }, { # series (treated as a playlist) 'url': 'https://www.playsuisse.ch/detail/1115687', @@ -70,7 +70,7 @@ class PlaySuisseIE(InfoExtractor): 'series': 'They all came out to Montreux', 'thumbnail': 're:https://playsuisse-img.akamaized.net/', 'title': 'Folge 1', - 'ext': 'mp4' + 'ext': 'mp4', }, }, { 'info_dict': { @@ -84,7 +84,7 @@ class PlaySuisseIE(InfoExtractor): 'series': 'They all came out to Montreux', 'thumbnail': 're:https://playsuisse-img.akamaized.net/', 'title': 'Folge 2', - 'ext': 'mp4' + 'ext': 'mp4', }, }, { 'info_dict': { @@ -98,10 +98,10 @@ class PlaySuisseIE(InfoExtractor): 'series': 'They all came out to Montreux', 'thumbnail': 're:https://playsuisse-img.akamaized.net/', 'title': 'Folge 3', - 'ext': 'mp4' - } + 'ext': 'mp4', + }, }], - } + }, ] _GRAPHQL_QUERY = ''' @@ -160,7 +160,7 @@ class PlaySuisseIE(InfoExtractor): query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({ 'request_type': 'RESPONSE', 'signInName': username, - 'password': password + 'password': password, }), expected_status=400), ('status', {int_or_none})) if status == 400: raise ExtractorError('Invalid username or password', expected=True) @@ -186,8 +186,8 @@ class PlaySuisseIE(InfoExtractor): media_id, data=json.dumps({ 'operationName': 'AssetWatch', 'query': self._GRAPHQL_QUERY, - 'variables': {'assetId': media_id} - }).encode('utf-8'), + 'variables': {'assetId': media_id}, + }).encode(), headers={'Content-Type': 'application/json', 'locale': 'de'}) return response['data']['assetV2'] diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py index a01b422..12e2fa2 100644 --- a/yt_dlp/extractor/playtvak.py +++ b/yt_dlp/extractor/playtvak.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) from ..utils import ( ExtractorError, int_or_none, @@ -27,7 +25,7 @@ class PlaytvakIE(InfoExtractor): 'timestamp': 1438732860, 'upload_date': '20150805', 'is_live': False, - } + }, }, { # live video test 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat', 'info_dict': { @@ -64,7 +62,7 @@ class PlaytvakIE(InfoExtractor): 'timestamp': 1438969140, 'upload_date': '20150807', 'is_live': False, - } + }, }, { # lidovky.cz 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE', 'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8', @@ -77,7 +75,7 @@ class PlaytvakIE(InfoExtractor): 'timestamp': 1439052180, 'upload_date': '20150808', 'is_live': False, - } + }, }, { # metro.cz 'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row', 'md5': '84fc1deedcac37b7d4a6ccae7c716668', @@ -90,7 +88,7 @@ class PlaytvakIE(InfoExtractor): 'timestamp': 1415725500, 'upload_date': '20141111', 'is_live': False, - } + }, }, { 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko', 'only_matching': True, @@ -104,16 +102,16 @@ class PlaytvakIE(InfoExtractor): info_url = self._html_search_regex( r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url') - parsed_url = compat_urlparse.urlparse(info_url) + parsed_url = urllib.parse.urlparse(info_url) - qs = compat_urlparse.parse_qs(parsed_url.query) + qs = urllib.parse.parse_qs(parsed_url.query) qs.update({ 'reklama': ['0'], 'type': ['js'], }) - info_url = compat_urlparse.urlunparse( - parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) + info_url = urllib.parse.urlunparse( + parsed_url._replace(query=urllib.parse.urlencode(qs, True))) json_info = self._download_json( info_url, video_id, @@ -136,7 +134,7 @@ class PlaytvakIE(InfoExtractor): continue format_ = fmt['format'] - format_id = '%s_%s' % (format_, fmt['quality']) + format_id = '{}_{}'.format(format_, fmt['quality']) preference = None if format_ in ('mp4', 'webm'): diff --git a/yt_dlp/extractor/playwire.py b/yt_dlp/extractor/playwire.py index 1057bff..8539a4b 100644 --- a/yt_dlp/extractor/playwire.py +++ b/yt_dlp/extractor/playwire.py @@ -48,7 +48,7 @@ class PlaywireIE(InfoExtractor): publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id') player = self._download_json( - 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id), + f'http://config.playwire.com/{publisher_id}/videos/v2/{video_id}/zeus.json', video_id) title = player['settings']['title'] diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index 60c9eff..d3f03f7 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -3,12 +3,9 @@ import json import os import random import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( ExtractorError, dict_get, @@ -27,7 +24,7 @@ from ..utils import ( class PluralsightBaseIE(InfoExtractor): _API_BASE = 'https://app.pluralsight.com' - _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE + _GRAPHQL_EP = f'{_API_BASE}/player/api/graphql' _GRAPHQL_HEADERS = { 'Content-Type': 'application/json;charset=UTF-8', } @@ -95,8 +92,8 @@ query BootstrapPlayer { response = self._download_json( self._GRAPHQL_EP, display_id, data=json.dumps({ 'query': self._GRAPHQL_COURSE_TMPL % course_id, - 'variables': {} - }).encode('utf-8'), headers=self._GRAPHQL_HEADERS) + 'variables': {}, + }).encode(), headers=self._GRAPHQL_HEADERS) course = try_get( response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'], @@ -105,7 +102,7 @@ query BootstrapPlayer { return course raise ExtractorError( - '%s said: %s' % (self.IE_NAME, response['error']['message']), + '{} said: {}'.format(self.IE_NAME, response['error']['message']), expected=True) @@ -176,7 +173,7 @@ query viewClip { 'post url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( post_url, None, 'Logging in', @@ -187,7 +184,7 @@ query viewClip { r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', response, 'error message', default=None) if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) if all(not re.search(p, response) for p in ( r'__INITIAL_STATE__', r'["\']currentUser["\']', @@ -196,13 +193,12 @@ query viewClip { BLOCKED = 'Your account has been blocked due to suspicious activity' if BLOCKED in response: raise ExtractorError( - 'Unable to login: %s' % BLOCKED, expected=True) + f'Unable to login: {BLOCKED}', expected=True) MUST_AGREE = 'To continue using Pluralsight, you must agree to' if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')): raise ExtractorError( - 'Unable to login: %s some documents. Go to pluralsight.com, ' - 'log in and agree with what Pluralsight requires.' - % MUST_AGREE, expected=True) + f'Unable to login: {MUST_AGREE} some documents. Go to pluralsight.com, ' + 'log in and agree with what Pluralsight requires.', expected=True) raise ExtractorError('Unable to log in') @@ -210,8 +206,7 @@ query viewClip { captions = None if clip_id: captions = self._download_json( - '%s/transcript/api/v1/caption/json/%s/%s' - % (self._API_BASE, clip_id, lang), video_id, + f'{self._API_BASE}/transcript/api/v1/caption/json/{clip_id}/{lang}', video_id, 'Downloading captions JSON', 'Unable to download captions JSON', fatal=False) if not captions: @@ -222,9 +217,9 @@ query viewClip { 'm': name, } captions = self._download_json( - '%s/player/retrieve-captions' % self._API_BASE, video_id, + f'{self._API_BASE}/player/retrieve-captions', video_id, 'Downloading captions JSON', 'Unable to download captions JSON', - fatal=False, data=json.dumps(captions_post).encode('utf-8'), + fatal=False, data=json.dumps(captions_post).encode(), headers={'Content-Type': 'application/json;charset=utf-8'}) if captions: return { @@ -234,7 +229,7 @@ query viewClip { }, { 'ext': 'srt', 'data': self._convert_subtitles(duration, captions), - }] + }], } @staticmethod @@ -255,10 +250,8 @@ query viewClip { continue srt += os.linesep.join( ( - '%d' % num, - '%s --> %s' % ( - srt_subtitles_timecode(start), - srt_subtitles_timecode(end)), + f'{num}', + f'{srt_subtitles_timecode(start)} --> {srt_subtitles_timecode(end)}', text, os.linesep, )) @@ -272,10 +265,10 @@ query viewClip { clip_idx = qs.get('clip', [None])[0] course_name = qs.get('course', [None])[0] - if any(not f for f in (author, name, clip_idx, course_name,)): + if any(not f for f in (author, name, clip_idx, course_name)): raise ExtractorError('Invalid URL', expected=True) - display_id = '%s-%s' % (name, clip_idx) + display_id = f'{name}-{clip_idx}' course = self._download_course(course_name, url, display_id) @@ -291,7 +284,7 @@ query viewClip { clip_index = clip_.get('index') if clip_index is None: continue - if compat_str(clip_index) == clip_idx: + if str(clip_index) == clip_idx: clip = clip_ break @@ -308,14 +301,14 @@ query viewClip { 'high-widescreen': {'width': 1280, 'height': 720}, } - QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',) + QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen') quality_key = qualities(QUALITIES_PREFERENCE) AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities']) ALLOWED_QUALITIES = ( - AllowedQuality('webm', ['high', ]), - AllowedQuality('mp4', ['low', 'medium', 'high', ]), + AllowedQuality('webm', ['high']), + AllowedQuality('mp4', ['low', 'medium', 'high']), ) # Some courses also offer widescreen resolution for high quality (see @@ -359,23 +352,23 @@ query viewClip { 'mediaType': ext, 'quality': '%dx%d' % (f['width'], f['height']), } - format_id = '%s-%s' % (ext, quality) + format_id = f'{ext}-{quality}' try: viewclip = self._download_json( self._GRAPHQL_EP, display_id, - 'Downloading %s viewclip graphql' % format_id, + f'Downloading {format_id} viewclip graphql', data=json.dumps({ 'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post, - 'variables': {} - }).encode('utf-8'), + 'variables': {}, + }).encode(), headers=self._GRAPHQL_HEADERS)['data']['viewClip'] except ExtractorError: # Still works but most likely will go soon viewclip = self._download_json( - '%s/video/clips/viewclip' % self._API_BASE, display_id, - 'Downloading %s viewclip JSON' % format_id, fatal=False, - data=json.dumps(clip_post).encode('utf-8'), + f'{self._API_BASE}/video/clips/viewclip', display_id, + f'Downloading {format_id} viewclip JSON', fatal=False, + data=json.dumps(clip_post).encode(), headers={'Content-Type': 'application/json;charset=utf-8'}) # Pluralsight tracks multiple sequential calls to ViewClip API and start @@ -404,7 +397,7 @@ query viewClip { clip_f.update({ 'url': clip_url, 'ext': ext, - 'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id, + 'format_id': f'{format_id}-{cdn}' if cdn else format_id, 'quality': quality_key(quality), 'source_preference': int_or_none(clip_url_data.get('rank')), }) @@ -472,7 +465,7 @@ class PluralsightCourseIE(PluralsightBaseIE): if clip_index is None: continue clip_url = update_url_query( - '%s/player' % self._API_BASE, query={ + f'{self._API_BASE}/player', query={ 'mode': 'live', 'course': course_name, 'author': author, diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py index 5898d92..234ee98 100644 --- a/yt_dlp/extractor/plutotv.py +++ b/yt_dlp/extractor/plutotv.py @@ -1,11 +1,8 @@ import re +import urllib.parse import uuid from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( ExtractorError, float_or_none, @@ -31,14 +28,14 @@ class PlutoTVIE(InfoExtractor): _INFO_QUERY_PARAMS = { 'appName': 'web', 'appVersion': 'na', - 'clientID': compat_str(uuid.uuid1()), + 'clientID': str(uuid.uuid1()), 'clientModelNumber': 'na', 'serverSideAds': 'false', 'deviceMake': 'unknown', 'deviceModel': 'web', 'deviceType': 'web', 'deviceVersion': 'unknown', - 'sid': compat_str(uuid.uuid1()), + 'sid': str(uuid.uuid1()), } _TESTS = [ { @@ -54,21 +51,21 @@ class PlutoTVIE(InfoExtractor): 'season_number': 2, 'episode_number': 3, 'duration': 3600, - } + }, }, { 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/1/', 'playlist_count': 11, 'info_dict': { 'id': '5de6c582e9379ae4912dedbd', 'title': 'I Love Money - Season 1', - } + }, }, { 'url': 'https://pluto.tv/on-demand/series/i-love-money/', 'playlist_count': 26, 'info_dict': { 'id': '5de6c582e9379ae4912dedbd', 'title': 'I Love Money', - } + }, }, { 'url': 'https://pluto.tv/on-demand/movies/arrival-2015-1-1', 'md5': '3cead001d317a018bf856a896dee1762', @@ -78,7 +75,7 @@ class PlutoTVIE(InfoExtractor): 'title': 'Arrival', 'description': 'When mysterious spacecraft touch down across the globe, an elite team - led by expert translator Louise Banks (Academy Award® nominee Amy Adams) – races against time to decipher their intent.', 'duration': 9000, - } + }, }, { 'url': 'https://pluto.tv/en/on-demand/series/manhunters-fugitive-task-force/seasons/1/episode/third-times-the-charm-1-1', 'only_matching': True, @@ -95,8 +92,8 @@ class PlutoTVIE(InfoExtractor): 'title': 'Attack of the Killer Tomatoes', 'description': 'A group of scientists band together to save the world from mutated tomatoes that KILL! (1978)', 'duration': 5700, - } - } + }, + }, ] def _to_ad_free_formats(self, video_id, formats, subtitles): @@ -112,14 +109,14 @@ class PlutoTVIE(InfoExtractor): re.MULTILINE) if first_segment_url: m3u8_urls.add( - compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8')) + urllib.parse.urljoin(first_segment_url.group(1), '0-end/master.m3u8')) continue first_segment_url = re.search( r'^(https?://.*/).+\-0+[0-1]0\.ts$', res, re.MULTILINE) if first_segment_url: m3u8_urls.add( - compat_urlparse.urljoin(first_segment_url.group(1), 'master.m3u8')) + urllib.parse.urljoin(first_segment_url.group(1), 'master.m3u8')) continue for m3u8_url in m3u8_urls: diff --git a/yt_dlp/extractor/podbayfm.py b/yt_dlp/extractor/podbayfm.py index 2a26fd2..0141eca 100644 --- a/yt_dlp/extractor/podbayfm.py +++ b/yt_dlp/extractor/podbayfm.py @@ -1,28 +1,40 @@ from .common import InfoExtractor -from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call +from ..utils import ( + OnDemandPagedList, + clean_html, + int_or_none, + jwt_decode_hs256, + url_or_none, +) +from ..utils.traversal import traverse_obj -def result_from_props(props, episode_id=None): +def result_from_props(props): return { - 'id': props.get('podcast_id') or episode_id, - 'title': props.get('title'), - 'url': props['mediaURL'], + **traverse_obj(props, { + 'id': ('_id', {str}), + 'title': ('title', {str}), + 'url': ('mediaURL', {url_or_none}), + 'description': ('description', {clean_html}), + 'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}), + 'timestamp': ('timestamp', {int_or_none}), + 'duration': ('duration', {int_or_none}), + }), 'ext': 'mp3', - 'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']), - 'timestamp': props.get('timestamp'), - 'duration': int_or_none(props.get('duration')), + 'vcodec': 'none', } class PodbayFMIE(InfoExtractor): - _VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$' + _VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)' _TESTS = [{ 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400', - 'md5': '98b41285dcf7989d105a4ed0404054cf', + 'md5': '895ac8505de349515f5ee8a4a3195c93', 'info_dict': { - 'id': '1647338400', + 'id': '62306451f4a48e58d0c4d6a8', 'title': 'Part One: Kissinger', 'ext': 'mp3', + 'description': r're:^We begin our epic six part series on Henry Kissinger.+', 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1647338400, 'duration': 5001, @@ -34,24 +46,25 @@ class PodbayFMIE(InfoExtractor): episode_id = self._match_id(url) webpage = self._download_webpage(url, episode_id) data = self._search_nextjs_data(webpage, episode_id) - return result_from_props(data['props']['pageProps']['episode'], episode_id) + return result_from_props(data['props']['pageProps']['episode']) class PodbayFMChannelIE(InfoExtractor): - _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$' + _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])' _TESTS = [{ 'url': 'https://podbay.fm/p/behind-the-bastards', 'info_dict': { 'id': 'behind-the-bastards', 'title': 'Behind the Bastards', }, + 'playlist_mincount': 21, }] _PAGE_SIZE = 10 def _fetch_page(self, channel_id, pagenum): return self._download_json( f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}', - channel_id)['podcast'] + f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast'] @staticmethod def _results_from_page(channel_id, page): diff --git a/yt_dlp/extractor/podchaser.py b/yt_dlp/extractor/podchaser.py index fc2d407..4570f0f 100644 --- a/yt_dlp/extractor/podchaser.py +++ b/yt_dlp/extractor/podchaser.py @@ -29,8 +29,8 @@ class PodchaserIE(InfoExtractor): 'duration': 3708, 'timestamp': 1636531259, 'upload_date': '20211110', - 'average_rating': 4.0 - } + 'average_rating': 4.0, + }, }, { 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853', 'info_dict': { @@ -38,15 +38,15 @@ class PodchaserIE(InfoExtractor): 'title': 'The Bone Zone', 'description': 'Podcast by The Bone Zone', }, - 'playlist_count': 275 + 'playlist_count': 275, }, { 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes', 'info_dict': { 'id': '699349', 'title': 'Sean Carroll\'s Mindscape: Science, Society, Philosophy, Culture, Arts, and Ideas', - 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1' + 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1', }, - 'playlist_mincount': 225 + 'playlist_mincount': 225, }] @staticmethod @@ -77,9 +77,9 @@ class PodchaserIE(InfoExtractor): 'count': self._PAGE_SIZE, 'sort_order': 'SORT_ORDER_RECENT', 'filters': { - 'podcast_id': podcast_id + 'podcast_id': podcast_id, }, - 'options': {} + 'options': {}, }).encode()) for episode in json_response['entities']: diff --git a/yt_dlp/extractor/podomatic.py b/yt_dlp/extractor/podomatic.py index 37b6869..b5af824 100644 --- a/yt_dlp/extractor/podomatic.py +++ b/yt_dlp/extractor/podomatic.py @@ -26,7 +26,7 @@ class PodomaticIE(InfoExtractor): 'uploader_id': 'scienceteachingtips', 'title': '64. When the Moon Hits Your Eye', 'duration': 446, - } + }, }, { 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', 'md5': 'd2cf443931b6148e27638650e2638297', @@ -37,7 +37,7 @@ class PodomaticIE(InfoExtractor): 'uploader_id': 'ostbahnhof', 'title': 'Einunddreizig', 'duration': 3799, - } + }, }, { 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00', 'only_matching': True, @@ -48,16 +48,15 @@ class PodomaticIE(InfoExtractor): video_id = mobj.group('id') channel = mobj.group('channel') or mobj.group('channel_2') - json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' - + '?permalink=true&rtmp=0') % - (mobj.group('proto'), channel, video_id)) + json_url = ('{}://{}.podomatic.com/entry/embed_params/{}?permalink=true&rtmp=0'.format( + mobj.group('proto'), channel, video_id)) data_json = self._download_webpage( json_url, video_id, 'Downloading video info') data = json.loads(data_json) video_url = data['downloadLink'] if not video_url: - video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation']) + video_url = '{}/{}'.format(data['streamer'].replace('rtmp', 'http'), data['mediaLocation']) uploader = data['podcast'] title = data['title'] thumbnail = data['imageLocation'] diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py index 0911893..1769684 100644 --- a/yt_dlp/extractor/pokemon.py +++ b/yt_dlp/extractor/pokemon.py @@ -48,7 +48,7 @@ class PokemonIE(InfoExtractor): video_id, display_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, video_id or display_id) video_data = extract_attributes(self._search_regex( - r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), + r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'), webpage, 'video data element')) video_id = video_data['data-video-id'] title = video_data.get('data-video-title') or self._html_search_meta( @@ -57,7 +57,7 @@ class PokemonIE(InfoExtractor): return { '_type': 'url_transparent', 'id': video_id, - 'url': 'limelight:media:%s' % video_id, + 'url': f'limelight:media:{video_id}', 'title': title, 'description': video_data.get('data-video-summary'), 'thumbnail': video_data.get('data-video-poster'), @@ -80,13 +80,13 @@ class PokemonWatchIE(InfoExtractor): 'ext': 'mp4', 'title': 'Lillier and the Staff!', 'description': 'md5:338841b8c21b283d24bdc9b568849f04', - } + }, }, { 'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07', - 'only_matching': True + 'only_matching': True, }] def _extract_media(self, channel_array, video_id): @@ -102,7 +102,7 @@ class PokemonWatchIE(InfoExtractor): info = { '_type': 'url', 'id': video_id, - 'url': 'limelight:media:%s' % video_id, + 'url': f'limelight:media:{video_id}', 'ie_key': 'LimelightMedia', } @@ -120,7 +120,7 @@ class PokemonWatchIE(InfoExtractor): if video_data is None: raise ExtractorError( - 'Video %s does not exist' % video_id, expected=True) + f'Video {video_id} does not exist', expected=True) info['_type'] = 'url_transparent' images = video_data.get('images') diff --git a/yt_dlp/extractor/pokergo.py b/yt_dlp/extractor/pokergo.py index 5c7baad..72cbce0 100644 --- a/yt_dlp/extractor/pokergo.py +++ b/yt_dlp/extractor/pokergo.py @@ -5,6 +5,7 @@ from ..utils import ( ExtractorError, try_get, ) +from ..utils.traversal import traverse_obj class PokerGoBaseIE(InfoExtractor): @@ -49,26 +50,27 @@ class PokerGoIE(PokerGoBaseIE): 'episode': 'Episode 2', 'display_id': '2a70ec4e-4a80-414b-97ec-725d9b72a7dc', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): - id = self._match_id(url) - data_json = self._download_json(f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/videos/{id}', id, - headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] + video_id = self._match_id(url) + data_json = self._download_json( + f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/videos/{video_id}', video_id, + headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] v_id = data_json['source'] thumbnails = [{ 'url': image['url'], 'id': image.get('label'), 'width': image.get('width'), - 'height': image.get('height') + 'height': image.get('height'), } for image in data_json.get('images') or [] if image.get('url')] - series_json = next(dct for dct in data_json.get('show_tags') or [] if dct.get('video_id') == id) or {} + series_json = traverse_obj(data_json, ('show_tags', lambda _, v: v['video_id'] == video_id, any)) or {} return { '_type': 'url_transparent', - 'display_id': id, + 'display_id': video_id, 'title': data_json.get('title'), 'description': data_json.get('description'), 'duration': data_json.get('duration'), @@ -76,7 +78,7 @@ class PokerGoIE(PokerGoBaseIE): 'season_number': series_json.get('season'), 'episode_number': series_json.get('episode_number'), 'series': try_get(series_json, lambda x: x['tag']['name']), - 'url': f'https://cdn.jwplayer.com/v2/media/{v_id}' + 'url': f'https://cdn.jwplayer.com/v2/media/{v_id}', } @@ -91,9 +93,10 @@ class PokerGoCollectionIE(PokerGoBaseIE): }, }] - def _entries(self, id): - data_json = self._download_json(f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/collections/{id}?include=entities', - id, headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] + def _entries(self, playlist_id): + data_json = self._download_json( + f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/collections/{playlist_id}?include=entities', + playlist_id, headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] for video in data_json.get('collection_video') or []: video_id = video.get('id') if video_id: @@ -102,5 +105,5 @@ class PokerGoCollectionIE(PokerGoBaseIE): ie=PokerGoIE.ie_key(), video_id=video_id) def _real_extract(self, url): - id = self._match_id(url) - return self.playlist_result(self._entries(id), playlist_id=id) + playlist_id = self._match_id(url) + return self.playlist_result(self._entries(playlist_id), playlist_id=playlist_id) diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py index ecf2132..d4a0d6a 100644 --- a/yt_dlp/extractor/polsatgo.py +++ b/yt_dlp/extractor/polsatgo.py @@ -33,7 +33,7 @@ class PolsatGoIE(InfoExtractor): continue yield { 'url': url, - 'height': int_or_none(try_get(source, lambda x: x['quality'][:-1])) + 'height': int_or_none(try_get(source, lambda x: x['quality'][:-1])), } def _real_extract(self, url): @@ -47,7 +47,7 @@ class PolsatGoIE(InfoExtractor): 'id': video_id, 'title': media['displayInfo']['title'], 'formats': formats, - 'age_limit': int_or_none(media['displayInfo']['ageGroup']) + 'age_limit': int_or_none(media['displayInfo']['ageGroup']), } def _call_api(self, endpoint, media_id, method, params): @@ -77,7 +77,7 @@ class PolsatGoIE(InfoExtractor): 'clientId': rand_uuid, 'cpid': 1, }, - }).encode('utf-8'), + }).encode(), headers={'Content-type': 'application/json'}) if not res.get('result'): if res['error']['code'] == 13404: diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index e0b22ff..6fb21e1 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -5,7 +5,6 @@ import re import urllib.parse from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, InAdvancePagedList, @@ -37,7 +36,7 @@ class PolskieRadioBaseExtractor(InfoExtractor): media_urls.add(media_url) entry = base_data.copy() entry.update({ - 'id': compat_str(media['id']), + 'id': str(media['id']), 'url': media_url, 'duration': int_or_none(media.get('length')), 'vcodec': 'none' if media.get('provider') == 'audio' else None, @@ -68,7 +67,7 @@ class PolskieRadioLegacyIE(PolskieRadioBaseExtractor): 'timestamp': 1592654400, 'upload_date': '20200620', 'duration': 1430, - 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' + 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$', }, }], }, { @@ -328,14 +327,14 @@ class PolskieRadioCategoryIE(InfoExtractor): 'id': '4143', 'title': 'Kierunek Kraków', }, - 'playlist_mincount': 61 + 'playlist_mincount': 61, }, { 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', 'info_dict': { 'id': '214', 'title': 'Muzyka', }, - 'playlist_mincount': 61 + 'playlist_mincount': 61, }, { # billennium tabs 'url': 'https://www.polskieradio.pl/8/2385', @@ -400,7 +399,7 @@ class PolskieRadioCategoryIE(InfoExtractor): params = self._search_json( r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+onclick=["\']TB_LoadTab\(', pagination, 'next page params', category_id, default=None, close_objects=1, - contains_pattern='.+', transform_source=lambda x: '[%s' % js_to_json(unescapeHTML(x))) + contains_pattern='.+', transform_source=lambda x: f'[{js_to_json(unescapeHTML(x))}') if not params: break tab_content = self._download_json( @@ -409,7 +408,7 @@ class PolskieRadioCategoryIE(InfoExtractor): data=json.dumps(dict(zip(( 'boxInstanceId', 'tabId', 'categoryType', 'sectionId', 'categoryId', 'pagerMode', 'subjectIds', 'tagIndexId', 'queryString', 'name', 'openArticlesInParentTemplate', - 'idSectionFromUrl', 'maxDocumentAge', 'showCategoryForArticle', 'pageNumber' + 'idSectionFromUrl', 'maxDocumentAge', 'showCategoryForArticle', 'pageNumber', ), params))).encode())['d'] content, pagination = tab_content['Content'], tab_content.get('PagerContent') elif is_post_back: @@ -511,7 +510,7 @@ class PolskieRadioPlayerIE(InfoExtractor): }) return { - 'id': compat_str(channel['id']), + 'id': str(channel['id']), 'formats': formats, 'title': channel.get('name') or channel.get('streamName'), 'display_id': channel_url, @@ -603,7 +602,7 @@ class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor): podcast_id, 'Downloading podcast metadata', data=json.dumps({ 'guids': [podcast_id], - }).encode('utf-8'), + }).encode(), headers={ 'Content-Type': 'application/json', }) diff --git a/yt_dlp/extractor/popcorntimes.py b/yt_dlp/extractor/popcorntimes.py index ddc5ec8..cfece86 100644 --- a/yt_dlp/extractor/popcorntimes.py +++ b/yt_dlp/extractor/popcorntimes.py @@ -1,5 +1,6 @@ +import base64 + from .common import InfoExtractor -from ..compat import compat_b64decode from ..utils import int_or_none @@ -49,7 +50,7 @@ class PopcorntimesIE(InfoExtractor): c_ord -= 26 loc_b64 += chr(c_ord) - video_url = compat_b64decode(loc_b64).decode('utf-8') + video_url = base64.b64decode(loc_b64).decode('utf-8') description = self._html_search_regex( r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage, diff --git a/yt_dlp/extractor/popcorntv.py b/yt_dlp/extractor/popcorntv.py index 7798462..2897bb4 100644 --- a/yt_dlp/extractor/popcorntv.py +++ b/yt_dlp/extractor/popcorntv.py @@ -37,7 +37,7 @@ class PopcornTVIE(InfoExtractor): m3u8_url = extract_attributes( self._search_regex( r'(<link[^>]+itemprop=["\'](?:content|embed)Url[^>]*>)', - webpage, 'content' + webpage, 'content', ))['href'] formats = self._extract_m3u8_formats( diff --git a/yt_dlp/extractor/pornbox.py b/yt_dlp/extractor/pornbox.py index c381382..e15244d 100644 --- a/yt_dlp/extractor/pornbox.py +++ b/yt_dlp/extractor/pornbox.py @@ -29,8 +29,8 @@ class PornboxIE(InfoExtractor): 'cast': ['Lily Strong', 'John Strong'], 'tags': 'count:11', 'description': 'md5:589c7f33e183aa8aa939537300efb859', - 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$' - } + 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$', + }, }, { 'url': 'https://pornbox.com/application/watch-page/216045', 'info_dict': { @@ -46,15 +46,15 @@ class PornboxIE(InfoExtractor): 'cast': 'count:3', 'tags': 'count:29', 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$', - 'subtitles': 'count:6' + 'subtitles': 'count:6', }, 'params': { 'skip_download': True, - 'ignore_no_formats_error': True + 'ignore_no_formats_error': True, }, 'expected_warnings': [ 'You are either not logged in or do not have access to this scene', - 'No video formats found', 'Requested format is not available'] + 'No video formats found', 'Requested format is not available'], }] def _real_extract(self, url): @@ -64,7 +64,7 @@ class PornboxIE(InfoExtractor): subtitles = {country_code: [{ 'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}', - 'ext': 'srt' + 'ext': 'srt', }] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))} is_free_scene = traverse_obj( diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py index d711d3e..bc684fd 100644 --- a/yt_dlp/extractor/pornflip.py +++ b/yt_dlp/extractor/pornflip.py @@ -43,7 +43,7 @@ class PornFlipIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://{}/sv/{}'.format(self._HOST, video_id), video_id, headers={'host': self._HOST}) + f'https://{self._HOST}/sv/{video_id}', video_id, headers={'host': self._HOST}) description = self._html_search_regex(r'&p\[summary\]=(.*?)\s*&p', webpage, 'description', fatal=False) duration = self._search_regex(r'"duration":\s+"([^"]+)",', webpage, 'duration', fatal=False) view_count = self._search_regex(r'"interactionCount":\s+"([^"]+)"', webpage, 'view_count', fatal=False) diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index d94f28c..679dc63 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -6,7 +6,6 @@ import re from .common import InfoExtractor from .openload import PhantomJSwrapper -from ..compat import compat_str from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( @@ -81,9 +80,9 @@ class PornHubBaseIE(InfoExtractor): if username is None: return - login_url = 'https://www.%s/%slogin' % (host, 'premium/' if 'premium' in host else '') + login_url = 'https://www.{}/{}login'.format(host, 'premium/' if 'premium' in host else '') login_page = self._download_webpage( - login_url, None, 'Downloading %s login page' % site) + login_url, None, f'Downloading {site} login page') def is_logged(webpage): return any(re.search(p, webpage) for p in ( @@ -102,8 +101,8 @@ class PornHubBaseIE(InfoExtractor): }) response = self._download_json( - 'https://www.%s/front/authenticate' % host, None, - 'Logging in to %s' % site, + f'https://www.{host}/front/authenticate', None, + f'Logging in to {site}', data=urlencode_postdata(login_form), headers={ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', @@ -118,23 +117,23 @@ class PornHubBaseIE(InfoExtractor): message = response.get('message') if message is not None: raise ExtractorError( - 'Unable to login: %s' % message, expected=True) + f'Unable to login: {message}', expected=True) raise ExtractorError('Unable to log in') class PornHubIE(PornHubBaseIE): IE_DESC = 'PornHub and Thumbzilla' - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) https?:// (?: (?:[^/]+\.)? - %s + {PornHubBaseIE._PORNHUB_HOST_RE} /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P<id>[\da-z]+) - ''' % PornHubBaseIE._PORNHUB_HOST_RE + ''' _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)'] _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', @@ -196,8 +195,8 @@ class PornHubIE(PornHubBaseIE): 'categories': list, 'subtitles': { 'en': [{ - "ext": 'srt' - }] + 'ext': 'srt', + }], }, }, 'params': { @@ -266,7 +265,7 @@ class PornHubIE(PornHubBaseIE): }] def _extract_count(self, pattern, webpage, name): - return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None)) + return str_to_int(self._search_regex(pattern, webpage, f'{name} count', default=None)) def _real_extract(self, url): mobj = self._match_valid_url(url) @@ -279,8 +278,8 @@ class PornHubIE(PornHubBaseIE): def dl_webpage(platform): self._set_cookie(host, 'platform', platform) return self._download_webpage( - 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id), - video_id, 'Downloading %s webpage' % platform) + f'https://www.{host}/view_video.php?viewkey={video_id}', + video_id, f'Downloading {platform} webpage') webpage = dl_webpage('pc') @@ -291,7 +290,7 @@ class PornHubIE(PornHubBaseIE): if error_msg: error_msg = re.sub(r'\s+', ' ', error_msg) raise ExtractorError( - 'PornHub said: %s' % error_msg, + f'PornHub said: {error_msg}', expected=True, video_id=video_id) if any(re.search(p, webpage) for p in ( @@ -332,7 +331,7 @@ class PornHubIE(PornHubBaseIE): if not isinstance(definition, dict): continue video_url = definition.get('videoUrl') - if not video_url or not isinstance(video_url, compat_str): + if not video_url or not isinstance(video_url, str): continue if video_url in video_urls_set: continue @@ -392,7 +391,7 @@ class PornHubIE(PornHubBaseIE): if not video_urls: FORMAT_PREFIXES = ('media', 'quality', 'qualityItems') js_vars = extract_js_vars( - webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), + webpage, r'(var\s+(?:{})_.+)'.format('|'.join(FORMAT_PREFIXES)), default=None) if js_vars: for key, format_url in js_vars.items(): @@ -403,7 +402,7 @@ class PornHubIE(PornHubBaseIE): if not video_urls and re.search( r'<[^>]+\bid=["\']lockedPlayer', webpage): raise ExtractorError( - 'Video %s is locked' % video_id, expected=True) + f'Video {video_id} is locked', expected=True) if not video_urls: js_vars = extract_js_vars( @@ -470,8 +469,8 @@ class PornHubIE(PornHubBaseIE): def extract_vote_count(kind, name): return self._extract_count( - (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind, - r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind), + (rf'<span[^>]+\bclass="votes{kind}"[^>]*>([\d,\.]+)</span>', + rf'<span[^>]+\bclass=["\']votes{kind}["\'][^>]*\bdata-rating=["\'](\d+)'), webpage, name) view_count = self._extract_count( @@ -483,8 +482,8 @@ class PornHubIE(PornHubBaseIE): def extract_list(meta_key): div = self._search_regex( - r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' - % meta_key, webpage, meta_key, default=None) + rf'(?s)<div[^>]+\bclass=["\'].*?\b{meta_key}Wrapper[^>]*>(.+?)</div>', + webpage, meta_key, default=None) if div: return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)] @@ -528,7 +527,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): return [ self.url_result( - 'http://www.%s/%s' % (host, video_url), + f'http://www.{host}/{video_url}', PornHubIE.ie_key(), video_title=title) for video_url, title in orderedSet(re.findall( r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', @@ -537,7 +536,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE + _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, @@ -574,7 +573,7 @@ class PornHubUserIE(PornHubPlaylistBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) user_id = mobj.group('id') - videos_url = '%s/videos' % mobj.group('url') + videos_url = '{}/videos'.format(mobj.group('url')) self._set_age_cookies(mobj.group('host')) page = self._extract_page(url) if page: @@ -599,7 +598,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): VIDEOS = '/videos' def download_page(base_url, num, fallback=False): - note = 'Downloading page %d%s' % (num, ' (switch to fallback)' if fallback else '') + note = 'Downloading page {}{}'.format(num, ' (switch to fallback)' if fallback else '') return self._download_webpage( base_url, item_id, note, query={'page': num}) @@ -646,7 +645,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE + _VALID_URL = rf'https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, @@ -748,11 +747,11 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): def suitable(cls, url): return (False if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) - else super(PornHubPagedVideoListIE, cls).suitable(url)) + else super().suitable(url)) class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE + _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' _TESTS = [{ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'info_dict': { @@ -769,7 +768,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): class PornHubPlaylistIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/playlist/(?P<id>[^/?#&]+))' % PornHubBaseIE._PORNHUB_HOST_RE + _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/playlist/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'https://www.pornhub.com/playlist/44121572', 'info_dict': { @@ -797,8 +796,8 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): page_entries = self._extract_entries(webpage, host) def download_page(page_num): - note = 'Downloading page {}'.format(page_num) - page_url = 'https://www.{}/playlist/viewChunked'.format(host) + note = f'Downloading page {page_num}' + page_url = f'https://www.{host}/playlist/viewChunked' return self._download_webpage(page_url, item_id, note, query={ 'id': playlist_id, 'page': page_num, @@ -811,8 +810,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE): page_entries = self._extract_entries(webpage, host) if not page_entries: break - for e in page_entries: - yield e + yield from page_entries def _real_extract(self, url): mobj = self._match_valid_url(url) diff --git a/yt_dlp/extractor/pornotube.py b/yt_dlp/extractor/pornotube.py index e0960f4..80c9b27 100644 --- a/yt_dlp/extractor/pornotube.py +++ b/yt_dlp/extractor/pornotube.py @@ -20,7 +20,7 @@ class PornotubeIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1417582800, 'age_limit': 18, - } + }, } def _real_extract(self, url): @@ -29,25 +29,24 @@ class PornotubeIE(InfoExtractor): token = self._download_json( 'https://api.aebn.net/auth/v2/origins/authenticate', video_id, note='Downloading token', - data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'), + data=json.dumps({'credentials': 'Clip Application'}).encode(), headers={ 'Content-Type': 'application/json', 'Origin': 'http://www.pornotube.com', })['tokenKey'] video_url = self._download_json( - 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id, + f'https://api.aebn.net/delivery/v1/clips/{video_id}/MP4', video_id, note='Downloading delivery information', headers={'Authorization': token})['mediaUrl'] FIELDS = ( 'title', 'description', 'startSecond', 'endSecond', 'publishDate', - 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber' + 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber', ) info = self._download_json( - 'https://api.aebn.net/content/v2/clips/%s?fields=%s' - % (video_id, ','.join(FIELDS)), video_id, + 'https://api.aebn.net/content/v2/clips/{}?fields={}'.format(video_id, ','.join(FIELDS)), video_id, note='Downloading metadata', headers={'Authorization': token}) diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py index b8e8701..587b3cd 100644 --- a/yt_dlp/extractor/pornovoisines.py +++ b/yt_dlp/extractor/pornovoisines.py @@ -29,9 +29,9 @@ class PornoVoisinesIE(InfoExtractor): 'subtitles': { 'fr': [{ 'ext': 'vtt', - }] + }], }, - } + }, } def _real_extract(self, url): @@ -40,7 +40,7 @@ class PornoVoisinesIE(InfoExtractor): display_id = mobj.group('display_id') settings_url = self._download_json( - 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, + f'http://www.pornovoisines.com/api/video/{video_id}/getsettingsurl/', video_id, note='Getting settings URL')['video_settings_url'] settings = self._download_json(settings_url, video_id)['data'] diff --git a/yt_dlp/extractor/pornoxo.py b/yt_dlp/extractor/pornoxo.py index 049feb4..fa31546 100644 --- a/yt_dlp/extractor/pornoxo.py +++ b/yt_dlp/extractor/pornoxo.py @@ -19,7 +19,7 @@ class PornoXOIE(InfoExtractor): 'categories': list, # NSFW 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/pr0gramm.py b/yt_dlp/extractor/pr0gramm.py index 3e0ccba..b0d6475 100644 --- a/yt_dlp/extractor/pr0gramm.py +++ b/yt_dlp/extractor/pr0gramm.py @@ -1,9 +1,9 @@ import datetime as dt +import functools import json import urllib.parse from .common import InfoExtractor -from ..compat import functools from ..utils import ( ExtractorError, float_or_none, @@ -198,6 +198,6 @@ class Pr0grammIE(InfoExtractor): 'dislike_count': ('down', {int}), 'timestamp': ('created', {int}), 'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}), - 'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}) + 'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}), }), } diff --git a/yt_dlp/extractor/prankcast.py b/yt_dlp/extractor/prankcast.py index 56cd40d..84e6f7e 100644 --- a/yt_dlp/extractor/prankcast.py +++ b/yt_dlp/extractor/prankcast.py @@ -22,8 +22,8 @@ class PrankCastIE(InfoExtractor): 'description': '', 'categories': ['prank'], 'tags': ['prank call', 'prank', 'live show'], - 'upload_date': '20220825' - } + 'upload_date': '20220825', + }, }, { 'url': 'https://prankcast.com/phonelosers/showreel/2048-NOT-COOL', 'info_dict': { @@ -39,8 +39,8 @@ class PrankCastIE(InfoExtractor): 'description': '', 'categories': ['prank'], 'tags': ['prank call', 'prank', 'live show'], - 'upload_date': '20221006' - } + 'upload_date': '20221006', + }, }] def _real_extract(self, url): @@ -62,10 +62,10 @@ class PrankCastIE(InfoExtractor): 'uploader': uploader, 'channel_id': str_or_none(json_info.get('user_id')), 'duration': try_call(lambda: parse_iso8601(json_info['end_date']) - start_date), - 'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))), + 'cast': list(filter(None, [uploader, *traverse_obj(guests_json, (..., 'name'))])), 'description': json_info.get('broadcast_description'), 'categories': [json_info.get('broadcast_category')], - 'tags': try_call(lambda: json_info['broadcast_tags'].split(',')) + 'tags': try_call(lambda: json_info['broadcast_tags'].split(',')), } @@ -85,8 +85,8 @@ class PrankCastPostIE(InfoExtractor): 'cast': ['Devonanustart'], 'description': '', 'categories': ['prank call'], - 'upload_date': '20240104' - } + 'upload_date': '20240104', + }, }, { 'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-', 'info_dict': { @@ -101,8 +101,8 @@ class PrankCastPostIE(InfoExtractor): 'cast': ['despicabledogs'], 'description': 'https://imgur.com/a/vtxLvKU', 'categories': [], - 'upload_date': '20240104' - } + 'upload_date': '20240104', + }, }] def _real_extract(self, url): @@ -124,7 +124,7 @@ class PrankCastPostIE(InfoExtractor): 'uploader': uploader, 'channel_id': str_or_none(post.get('user_id')), 'duration': float_or_none(content.get('duration')), - 'cast': list(filter(None, [uploader] + traverse_obj(guests_json, (..., 'name')))), + 'cast': list(filter(None, [uploader, *traverse_obj(guests_json, (..., 'name'))])), 'description': post.get('post_body'), 'categories': list(filter(None, [content.get('category')])), 'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))), @@ -133,5 +133,5 @@ class PrankCastPostIE(InfoExtractor): 'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=', 'ext': 'json', }], - } if post.get('content_id') else None + } if post.get('content_id') else None, } diff --git a/yt_dlp/extractor/premiershiprugby.py b/yt_dlp/extractor/premiershiprugby.py index 67d41fd..313b4d2 100644 --- a/yt_dlp/extractor/premiershiprugby.py +++ b/yt_dlp/extractor/premiershiprugby.py @@ -14,7 +14,7 @@ class PremiershipRugbyIE(InfoExtractor): 'duration': 6093.0, 'tags': ['video'], 'categories': ['Full Match', 'Harlequins', 'Newcastle Falcons', 'gallaher premiership'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/presstv.py b/yt_dlp/extractor/presstv.py index 26ce74a..30eb64b 100644 --- a/yt_dlp/extractor/presstv.py +++ b/yt_dlp/extractor/presstv.py @@ -15,8 +15,8 @@ class PressTVIE(InfoExtractor): 'title': 'Organic mattresses used to clean waste water', 'upload_date': '20160409', 'thumbnail': r're:^https?://.*\.jpg', - 'description': 'md5:20002e654bbafb6908395a5c0cfcd125' - } + 'description': 'md5:20002e654bbafb6908395a5c0cfcd125', + }, } def _real_extract(self, url): @@ -36,12 +36,12 @@ class PressTVIE(InfoExtractor): (180, '_low200.mp4'), (360, '_low400.mp4'), (720, '_low800.mp4'), - (1080, '.mp4') + (1080, '.mp4'), ] formats = [{ 'url': base_url + video_url[:-4] + extension, - 'format_id': '%dp' % height, + 'format_id': f'{height}p', 'height': height, } for height, extension in _formats] @@ -65,5 +65,5 @@ class PressTVIE(InfoExtractor): 'formats': formats, 'thumbnail': thumbnail, 'upload_date': upload_date, - 'description': description + 'description': description, } diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py index daf1405..6f42485 100644 --- a/yt_dlp/extractor/projectveritas.py +++ b/yt_dlp/extractor/projectveritas.py @@ -17,7 +17,7 @@ class ProjectVeritasIE(InfoExtractor): 'title': 'Exclusive: Inside The New York and New Jersey Hospitals Battling Coronavirus', 'upload_date': '20200327', 'thumbnail': 'md5:6076477fe50b03eb8708be9415e18e1c', - } + }, }, { 'url': 'https://www.projectveritas.com/video/ilhan-omar-connected-ballot-harvester-in-cash-for-ballots-scheme-car-is-full/', 'info_dict': { @@ -26,13 +26,13 @@ class ProjectVeritasIE(InfoExtractor): 'title': 'Ilhan Omar connected Ballot Harvester in cash-for-ballots scheme: "Car is full" of absentee ballots', 'upload_date': '20200927', 'thumbnail': 'md5:194b8edf0e2ba64f25500ff4378369a4', - } + }, }] def _real_extract(self, url): - id, type = self._match_valid_url(url).group('id', 'type') - api_url = f'https://www.projectveritas.com/page-data/{type}/{id}/page-data.json' - data_json = self._download_json(api_url, id)['result']['data'] + video_id, video_type = self._match_valid_url(url).group('id', 'type') + api_url = f'https://www.projectveritas.com/page-data/{video_type}/{video_id}/page-data.json' + data_json = self._download_json(api_url, video_id)['result']['data'] main_data = traverse_obj(data_json, 'video', 'post') video_id = main_data['id'] thumbnail = traverse_obj(main_data, ('image', 'ogImage', 'src')) diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py index 4c33bae..e8a4712 100644 --- a/yt_dlp/extractor/prosiebensat1.py +++ b/yt_dlp/extractor/prosiebensat1.py @@ -2,12 +2,12 @@ import hashlib import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, determine_ext, float_or_none, int_or_none, + join_nonempty, merge_dicts, unified_strdate, ) @@ -75,12 +75,12 @@ class ProSiebenSat1BaseIE(InfoExtractor): 'format_id': protocol, }) if not formats: - source_ids = [compat_str(source['id']) for source in video['sources']] + source_ids = [str(source['id']) for source in video['sources']] - client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode()).hexdigest() sources = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, + f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources', clip_id, 'Downloading sources JSON', query={ 'access_token': self._TOKEN, 'client_id': client_id, @@ -96,9 +96,9 @@ class ProSiebenSat1BaseIE(InfoExtractor): return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate for source_id in source_ids: - client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode()).hexdigest() urls = self._download_json( - 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, + f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources/url', clip_id, 'Downloading urls JSON', fatal=False, query={ 'access_token': self._TOKEN, 'client_id': client_id, @@ -141,20 +141,20 @@ class ProSiebenSat1BaseIE(InfoExtractor): app = path[:mp4colon_index] play_path = path[mp4colon_index:] formats.append({ - 'url': '%s/%s' % (mobj.group('url'), app), + 'url': '{}/{}'.format(mobj.group('url'), app), 'app': app, 'play_path': play_path, 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', 'page_url': 'http://www.prosieben.de', 'tbr': tbr, 'ext': 'flv', - 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), + 'format_id': join_nonempty('rtmp', tbr), }) else: formats.append({ 'url': source_url, 'tbr': tbr, - 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), + 'format_id': join_nonempty('http', tbr), }) return { @@ -493,4 +493,4 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE): return self._extract_playlist(url, webpage) else: raise ExtractorError( - 'Unsupported page type %s' % page_type, expected=True) + f'Unsupported page type {page_type}', expected=True) diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py index 338794e..742479c 100644 --- a/yt_dlp/extractor/prx.py +++ b/yt_dlp/extractor/prx.py @@ -38,7 +38,7 @@ class PRXBaseIE(InfoExtractor): 'filesize': image_response.get('size'), 'width': image_response.get('width'), 'height': image_response.get('height'), - 'url': cls._extract_file_link(image_response) + 'url': cls._extract_file_link(image_response), } @classmethod @@ -63,7 +63,7 @@ class PRXBaseIE(InfoExtractor): 'duration': int_or_none(response.get('duration')), 'tags': response.get('tags'), 'episode_number': int_or_none(response.get('episodeIdentifier')), - 'season_number': int_or_none(response.get('seasonIdentifier')) + 'season_number': int_or_none(response.get('seasonIdentifier')), } @classmethod @@ -92,7 +92,7 @@ class PRXBaseIE(InfoExtractor): **base_info, 'title': name, 'channel_id': base_info.get('id'), - 'channel_url': 'https://beta.prx.org/accounts/%s' % base_info.get('id'), + 'channel_url': 'https://beta.prx.org/accounts/{}'.format(base_info.get('id')), 'channel': name, } @@ -111,7 +111,7 @@ class PRXBaseIE(InfoExtractor): 'series_id': series.get('series_id'), 'channel_id': account.get('channel_id'), 'channel_url': account.get('channel_url'), - 'channel': account.get('channel') + 'channel': account.get('channel'), } def _entries(self, item_id, endpoint, entry_func, query=None): @@ -124,7 +124,7 @@ class PRXBaseIE(InfoExtractor): response = self._call_api(f'{item_id}: page {page}', endpoint, query={ **(query or {}), 'page': page, - 'per': 100 + 'per': 100, }) items = self._get_prx_embed_response(response, 'items') if not response or not items: @@ -142,8 +142,8 @@ class PRXBaseIE(InfoExtractor): return story.update({ '_type': 'url', - 'url': 'https://beta.prx.org/stories/%s' % story['id'], - 'ie_key': PRXStoryIE.ie_key() + 'url': 'https://beta.prx.org/stories/{}'.format(story['id']), + 'ie_key': PRXStoryIE.ie_key(), }) return story @@ -153,8 +153,8 @@ class PRXBaseIE(InfoExtractor): return series.update({ '_type': 'url', - 'url': 'https://beta.prx.org/series/%s' % series['id'], - 'ie_key': PRXSeriesIE.ie_key() + 'url': 'https://beta.prx.org/series/{}'.format(series['id']), + 'ie_key': PRXSeriesIE.ie_key(), }) return series @@ -205,8 +205,8 @@ class PRXStoryIE(PRXBaseIE): 'episode': 'Episode 8', 'release_date': '20211223', 'season': 'Season 5', - 'modified_date': '20220104' - } + 'modified_date': '20220104', + }, }, { 'info_dict': { 'id': '399200_part2', @@ -229,11 +229,11 @@ class PRXStoryIE(PRXBaseIE): 'episode': 'Episode 8', 'release_date': '20211223', 'season': 'Season 5', - 'modified_date': '20220104' - } - } + 'modified_date': '20220104', + }, + }, - ] + ], }, { # Story with only split audio 'url': 'https://beta.prx.org/stories/326414', @@ -251,7 +251,7 @@ class PRXStoryIE(PRXBaseIE): 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', }, - 'playlist_count': 4 + 'playlist_count': 4, }, { # Story with single combined audio 'url': 'https://beta.prx.org/stories/400404', @@ -272,12 +272,12 @@ class PRXStoryIE(PRXBaseIE): 'tags': 'count:0', 'thumbnail': r're:https?://cms\.prx\.org/pub/\w+/0/web/story_image/767965/medium/Aurora_Over_Trees\.jpg', 'upload_date': '20220103', - 'modified_date': '20220103' - } + 'modified_date': '20220103', + }, }, { 'url': 'https://listen.prx.org/stories/399200', - 'only_matching': True - } + 'only_matching': True, + }, ] def _extract_audio_pieces(self, audio_response): @@ -290,7 +290,7 @@ class PRXStoryIE(PRXBaseIE): 'asr': int_or_none(piece_response.get('frequency'), scale=1000), 'abr': int_or_none(piece_response.get('bitRate')), 'url': self._extract_file_link(piece_response), - 'vcodec': 'none' + 'vcodec': 'none', } for piece_response in sorted( self._get_prx_embed_response(audio_response, 'items') or [], key=lambda p: int_or_none(p.get('position')))] @@ -304,18 +304,18 @@ class PRXStoryIE(PRXBaseIE): if len(audio_pieces) == 1: return { 'formats': audio_pieces, - **info + **info, } entries = [{ **info, - 'id': '%s_part%d' % (info['id'], (idx + 1)), + 'id': '{}_part{}'.format(info['id'], (idx + 1)), 'formats': [fmt], } for idx, fmt in enumerate(audio_pieces)] return { '_type': 'multi_video', 'entries': entries, - **info + **info, } def _real_extract(self, url): @@ -340,9 +340,9 @@ class PRXSeriesIE(PRXBaseIE): 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', 'series': 'Outside/In', - 'series_id': '36252' + 'series_id': '36252', }, - 'playlist_mincount': 39 + 'playlist_mincount': 39, }, { # Blank series 'url': 'https://beta.prx.org/series/25038', @@ -355,18 +355,18 @@ class PRXSeriesIE(PRXBaseIE): 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', 'series': '25038', - 'series_id': '25038' + 'series_id': '25038', }, - 'playlist_count': 0 - } + 'playlist_count': 0, + }, ] def _extract_series(self, series_response): info = self._extract_series_info(series_response) return { '_type': 'playlist', - 'entries': self._entries(info['id'], 'series/%s/stories' % info['id'], self._story_playlist_entry), - **info + 'entries': self._entries(info['id'], 'series/{}/stories'.format(info['id']), self._story_playlist_entry), + **info, } def _real_extract(self, url): @@ -386,9 +386,9 @@ class PRXAccountIE(PRXBaseIE): 'channel_id': '206', 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', - 'thumbnails': 'count:1' + 'thumbnails': 'count:1', }, - 'playlist_mincount': 380 + 'playlist_mincount': 380, }] def _extract_account(self, account_response): @@ -400,7 +400,7 @@ class PRXAccountIE(PRXBaseIE): return { '_type': 'playlist', 'entries': itertools.chain(series, stories), - **info + **info, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py index fc4c29e..b62050e 100644 --- a/yt_dlp/extractor/puhutv.py +++ b/yt_dlp/extractor/puhutv.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -48,25 +47,25 @@ class PuhuTVIE(InfoExtractor): _SUBTITLE_LANGS = { 'English': 'en', 'Deutsch': 'de', - 'عربى': 'ar' + 'عربى': 'ar', } def _real_extract(self, url): display_id = self._match_id(url) info = self._download_json( - urljoin(url, '/api/slug/%s-izle' % display_id), + urljoin(url, f'/api/slug/{display_id}-izle'), display_id)['data'] - video_id = compat_str(info['id']) + video_id = str(info['id']) show = info.get('title') or {} title = info.get('name') or show['name'] if info.get('display_name'): - title = '%s %s' % (title, info['display_name']) + title = '{} {}'.format(title, info['display_name']) try: videos = self._download_json( - 'https://puhutv.com/api/assets/%s/videos' % video_id, + f'https://puhutv.com/api/assets/{video_id}/videos', display_id, 'Downloading video JSON', headers=self.geo_verification_headers()) except ExtractorError as e: @@ -94,7 +93,7 @@ class PuhuTVIE(InfoExtractor): f = { 'url': media_url, 'ext': 'mp4', - 'height': quality + 'height': quality, } video_format = video.get('video_format') is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False @@ -106,12 +105,12 @@ class PuhuTVIE(InfoExtractor): else: continue if quality: - format_id += '-%sp' % quality + format_id += f'-{quality}p' f['format_id'] = format_id formats.append(f) creator = try_get( - show, lambda x: x['producer']['name'], compat_str) + show, lambda x: x['producer']['name'], str) content = info.get('content') or {} @@ -119,14 +118,14 @@ class PuhuTVIE(InfoExtractor): content, lambda x: x['images']['wide'], dict) or {} thumbnails = [] for image_id, image_url in images.items(): - if not isinstance(image_url, compat_str): + if not isinstance(image_url, str): continue if not image_url.startswith(('http', '//')): - image_url = 'https://%s' % image_url + image_url = f'https://{image_url}' t = parse_resolution(image_id) t.update({ 'id': image_id, - 'url': image_url + 'url': image_url, }) thumbnails.append(t) @@ -135,7 +134,7 @@ class PuhuTVIE(InfoExtractor): if not isinstance(genre, dict): continue genre_name = genre.get('name') - if genre_name and isinstance(genre_name, compat_str): + if genre_name and isinstance(genre_name, str): tags.append(genre_name) subtitles = {} @@ -144,10 +143,10 @@ class PuhuTVIE(InfoExtractor): continue lang = subtitle.get('language') sub_url = url_or_none(subtitle.get('url') or subtitle.get('file')) - if not lang or not isinstance(lang, compat_str) or not sub_url: + if not lang or not isinstance(lang, str) or not sub_url: continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ - 'url': sub_url + 'url': sub_url, }] return { @@ -166,7 +165,7 @@ class PuhuTVIE(InfoExtractor): 'tags': tags, 'subtitles': subtitles, 'thumbnails': thumbnails, - 'formats': formats + 'formats': formats, } @@ -195,8 +194,8 @@ class PuhuTVSerieIE(InfoExtractor): has_more = True while has_more is True: season = self._download_json( - 'https://galadriel.puhutv.com/seasons/%s' % season_id, - season_id, 'Downloading page %s' % page, query={ + f'https://galadriel.puhutv.com/seasons/{season_id}', + season_id, f'Downloading page {page}', query={ 'page': page, 'per': 40, }) @@ -208,7 +207,7 @@ class PuhuTVSerieIE(InfoExtractor): continue video_id = str_or_none(int_or_none(ep.get('id'))) yield self.url_result( - 'https://puhutv.com/%s' % slug_path, + f'https://puhutv.com/{slug_path}', ie=PuhuTVIE.ie_key(), video_id=video_id, video_title=ep.get('name') or ep.get('eventLabel')) page += 1 @@ -218,7 +217,7 @@ class PuhuTVSerieIE(InfoExtractor): playlist_id = self._match_id(url) info = self._download_json( - urljoin(url, '/api/slug/%s-detay' % playlist_id), + urljoin(url, f'/api/slug/{playlist_id}-detay'), playlist_id)['data'] seasons = info.get('seasons') @@ -229,5 +228,5 @@ class PuhuTVSerieIE(InfoExtractor): # For films, these are using same url with series video_id = info.get('slug') or info['assets'][0]['slug'] return self.url_result( - 'https://puhutv.com/%s-izle' % video_id, + f'https://puhutv.com/{video_id}-izle', PuhuTVIE.ie_key(), video_id) diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py index 38c5d11..b43f035 100644 --- a/yt_dlp/extractor/puls4.py +++ b/yt_dlp/extractor/puls4.py @@ -1,5 +1,4 @@ from .prosiebensat1 import ProSiebenSat1BaseIE -from ..compat import compat_str from ..utils import parse_duration, unified_strdate @@ -37,7 +36,7 @@ class Puls4IE(ProSiebenSat1BaseIE): player_content = media['playerContent'] info = self._extract_video_info(url, player_content['id']) info.update({ - 'id': compat_str(media['objectId']), + 'id': str(media['objectId']), 'title': player_content['title'], 'description': media.get('description'), 'thumbnail': media.get('previewLink'), diff --git a/yt_dlp/extractor/pyvideo.py b/yt_dlp/extractor/pyvideo.py index 7b25166..6ae3155 100644 --- a/yt_dlp/extractor/pyvideo.py +++ b/yt_dlp/extractor/pyvideo.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import int_or_none @@ -32,8 +31,8 @@ class PyvideoIE(InfoExtractor): entries = [] data = self._download_json( - 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' - % (category, video_id), video_id, fatal=False) + f'https://raw.githubusercontent.com/pyvideo/data/master/{category}/videos/{video_id}.json', + video_id, fatal=False) if data: for video in data['videos']: @@ -43,7 +42,7 @@ class PyvideoIE(InfoExtractor): entries.append(self.url_result(video_url, 'Youtube')) else: entries.append({ - 'id': compat_str(data.get('id') or video_id), + 'id': str(data.get('id') or video_id), 'url': video_url, 'title': data['title'], 'description': data.get('description') or data.get('summary'), diff --git a/yt_dlp/extractor/qingting.py b/yt_dlp/extractor/qingting.py index cb00de2..74fecee 100644 --- a/yt_dlp/extractor/qingting.py +++ b/yt_dlp/extractor/qingting.py @@ -14,7 +14,7 @@ class QingTingIE(InfoExtractor): 'channel': '睡前消息', 'uploader': '马督工', 'ext': 'm4a', - } + }, }, { 'url': 'https://m.qtfm.cn/vchannels/378005/programs/23023573/', 'md5': '2703120b6abe63b5fa90b975a58f4c0e', @@ -25,7 +25,7 @@ class QingTingIE(InfoExtractor): 'channel': '睡前消息', 'uploader': '马督工', 'ext': 'm4a', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py index 90141e6..d023869 100644 --- a/yt_dlp/extractor/qqmusic.py +++ b/yt_dlp/extractor/qqmusic.py @@ -1,48 +1,125 @@ +import base64 +import functools +import json import random -import re import time from .common import InfoExtractor from ..utils import ( ExtractorError, + OnDemandPagedList, clean_html, + int_or_none, + join_nonempty, + js_to_json, + str_or_none, strip_jsonp, + traverse_obj, unescapeHTML, + url_or_none, + urljoin, ) -class QQMusicIE(InfoExtractor): +class QQMusicBaseIE(InfoExtractor): + def _get_cookie(self, key, default=None): + return getattr(self._get_cookies('https://y.qq.com').get(key), 'value', default) + + def _get_g_tk(self): + n = 5381 + for c in self._get_cookie('qqmusic_key', ''): + n += (n << 5) + ord(c) + return n & 2147483647 + + def _get_uin(self): + return int_or_none(self._get_cookie('uin')) or 0 + + @property + def is_logged_in(self): + return bool(self._get_uin() and self._get_cookie('fqm_pvqid')) + + # Reference: m_r_GetRUin() in top_player.js + # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js + @staticmethod + def _m_r_get_ruin(): + cur_ms = int(time.time() * 1000) % 1000 + return int(round(random.random() * 2147483647) * cur_ms % 1E10) + + def _download_init_data(self, url, mid, fatal=True): + webpage = self._download_webpage(url, mid, fatal=fatal) + return self._search_json(r'window\.__INITIAL_DATA__\s*=', webpage, + 'init data', mid, transform_source=js_to_json, fatal=fatal) + + def _make_fcu_req(self, req_dict, mid, headers={}, **kwargs): + return self._download_json( + 'https://u.y.qq.com/cgi-bin/musicu.fcg', mid, data=json.dumps({ + 'comm': { + 'cv': 0, + 'ct': 24, + 'format': 'json', + 'uin': self._get_uin(), + }, + **req_dict, + }, separators=(',', ':')).encode(), headers=headers, **kwargs) + + +class QQMusicIE(QQMusicBaseIE): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html' + _VALID_URL = r'https?://y\.qq\.com/n/ryqq/songDetail/(?P<id>[0-9A-Za-z]+)' _TESTS = [{ - 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', + 'url': 'https://y.qq.com/n/ryqq/songDetail/004Ti8rT003TaZ', + 'md5': 'd7adc5c438d12e2cb648cca81593fd47', + 'info_dict': { + 'id': '004Ti8rT003TaZ', + 'ext': 'mp3', + 'title': '永夜のパレード (永夜的游行)', + 'album': '幻想遊園郷 -Fantastic Park-', + 'release_date': '20111230', + 'duration': 281, + 'creators': ['ケーキ姫', 'JUMA'], + 'genres': ['Pop'], + 'description': 'md5:b5261f3d595657ae561e9e6aee7eb7d9', + 'size': 4501244, + 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', + 'subtitles': 'count:1', + }, + }, { + 'url': 'https://y.qq.com/n/ryqq/songDetail/004295Et37taLD', 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', 'info_dict': { 'id': '004295Et37taLD', 'ext': 'mp3', 'title': '可惜没如果', - 'release_date': '20141227', - 'creator': '林俊杰', - 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac', - 'thumbnail': r're:^https?://.*\.jpg$', - } + 'album': '新地球 - 人 (Special Edition)', + 'release_date': '20150129', + 'duration': 298, + 'creators': ['林俊杰'], + 'genres': ['Pop'], + 'description': 'md5:f568421ff618d2066e74b65a04149c4e', + 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', + }, + 'skip': 'premium member only', }, { 'note': 'There is no mp3-320 version of this song.', - 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html', - 'md5': 'fa3926f0c585cda0af8fa4f796482e3e', + 'url': 'https://y.qq.com/n/ryqq/songDetail/004MsGEo3DdNxV', + 'md5': '028aaef1ae13d8a9f4861a92614887f9', 'info_dict': { 'id': '004MsGEo3DdNxV', 'ext': 'mp3', 'title': '如果', + 'album': '新传媒电视连续剧金曲系列II', 'release_date': '20050626', - 'creator': '李季美', - 'description': 'md5:46857d5ed62bc4ba84607a805dccf437', - 'thumbnail': r're:^https?://.*\.jpg$', - } + 'duration': 220, + 'creators': ['李季美'], + 'genres': [], + 'description': 'md5:fc711212aa623b28534954dc4bd67385', + 'size': 3535730, + 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', + }, }, { 'note': 'lyrics not in .lrc format', - 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html', + 'url': 'https://y.qq.com/n/ryqq/songDetail/001JyApY11tIp6', 'info_dict': { 'id': '001JyApY11tIp6', 'ext': 'mp3', @@ -50,186 +127,193 @@ class QQMusicIE(InfoExtractor): 'release_date': '19970225', 'creator': 'Dark Funeral', 'description': 'md5:c9b20210587cbcd6836a1c597bab4525', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - 'params': { - 'skip_download': True, + 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, + 'params': {'skip_download': True}, + 'skip': 'no longer available', }] _FORMATS = { - 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, - 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, - 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10} + 'F000': {'name': 'flac', 'prefix': 'F000', 'ext': 'flac', 'preference': 60}, + 'A000': {'name': 'ape', 'prefix': 'A000', 'ext': 'ape', 'preference': 50}, + 'M800': {'name': '320mp3', 'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, + 'M500': {'name': '128mp3', 'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, + 'C400': {'name': '96aac', 'prefix': 'C400', 'ext': 'm4a', 'preference': 20, 'abr': 96}, + 'C200': {'name': '48aac', 'prefix': 'C200', 'ext': 'm4a', 'preference': 20, 'abr': 48}, } - # Reference: m_r_GetRUin() in top_player.js - # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js - @staticmethod - def m_r_get_ruin(): - curMs = int(time.time() * 1000) % 1000 - return int(round(random.random() * 2147483647) * curMs % 1E10) - def _real_extract(self, url): mid = self._match_id(url) - detail_info_page = self._download_webpage( - 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, - mid, note='Download song detail info', - errnote='Unable to get song detail info', encoding='gbk') - - song_name = self._html_search_regex( - r"songname:\s*'([^']+)'", detail_info_page, 'song name') - - publish_time = self._html_search_regex( - r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, - 'publish time', default=None) - if publish_time: - publish_time = publish_time.replace('-', '') - - singer = self._html_search_regex( - r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) - - lrc_content = self._html_search_regex( - r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', - detail_info_page, 'LRC lyrics', default=None) - if lrc_content: - lrc_content = lrc_content.replace('\\n', '\n') - - thumbnail_url = None - albummid = self._search_regex( - [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], - detail_info_page, 'album mid', default=None) - if albummid: - thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \ - % (albummid[-2:-1], albummid[-1], albummid) - - guid = self.m_r_get_ruin() - - vkey = self._download_json( - 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, - mid, note='Retrieve vkey', errnote='Unable to get vkey', - transform_source=strip_jsonp)['key'] - + init_data = self._download_init_data(url, mid, fatal=False) + info_data = self._make_fcu_req({'info': { + 'module': 'music.pf_song_detail_svr', + 'method': 'get_song_detail_yqq', + 'param': { + 'song_mid': mid, + 'song_type': 0, + }, + }}, mid, note='Downloading song info')['info']['data']['track_info'] + + media_mid = info_data['file']['media_mid'] + + data = self._make_fcu_req({ + 'req_1': { + 'module': 'vkey.GetVkeyServer', + 'method': 'CgiGetVkey', + 'param': { + 'guid': str(self._m_r_get_ruin()), + 'songmid': [mid] * len(self._FORMATS), + 'songtype': [0] * len(self._FORMATS), + 'uin': str(self._get_uin()), + 'loginflag': 1, + 'platform': '20', + 'filename': [f'{f["prefix"]}{media_mid}.{f["ext"]}' for f in self._FORMATS.values()], + }, + }, + 'req_2': { + 'module': 'music.musichallSong.PlayLyricInfo', + 'method': 'GetPlayLyricInfo', + 'param': {'songMID': mid}, + }, + }, mid, note='Downloading formats and lyric', headers=self.geo_verification_headers()) + + code = traverse_obj(data, ('req_1', 'code', {int})) + if code != 0: + raise ExtractorError(f'Failed to download format info, error code {code or "unknown"}') formats = [] - for format_id, details in self._FORMATS.items(): + for media_info in traverse_obj(data, ( + 'req_1', 'data', 'midurlinfo', lambda _, v: v['songmid'] == mid and v['purl']), + ): + format_key = traverse_obj(media_info, ('filename', {str}, {lambda x: x[:4]})) + format_info = self._FORMATS.get(format_key) or {} + format_id = format_info.get('name') formats.append({ - 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' - % (details['prefix'], mid, details['ext'], vkey, guid), + 'url': urljoin('https://dl.stream.qqmusic.qq.com', media_info['purl']), 'format': format_id, 'format_id': format_id, - 'quality': details['preference'], - 'abr': details.get('abr'), + 'size': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})), + 'quality': format_info.get('preference'), + 'abr': format_info.get('abr'), + 'ext': format_info.get('ext'), + 'vcodec': 'none', }) - self._check_formats(formats, mid) - actual_lrc_lyrics = ''.join( - line + '\n' for line in re.findall( - r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content)) + if not formats and not self.is_logged_in: + self.raise_login_required() + + if traverse_obj(data, ('req_2', 'code')): + self.report_warning(f'Failed to download lyric, error {data["req_2"]["code"]!r}') + lrc_content = traverse_obj(data, ('req_2', 'data', 'lyric', {lambda x: base64.b64decode(x).decode('utf-8')})) info_dict = { 'id': mid, 'formats': formats, - 'title': song_name, - 'release_date': publish_time, - 'creator': singer, - 'description': lrc_content, - 'thumbnail': thumbnail_url + **traverse_obj(info_data, { + 'title': ('title', {str}), + 'album': ('album', 'title', {str}, {lambda x: x or None}), + 'release_date': ('time_public', {lambda x: x.replace('-', '') or None}), + 'creators': ('singer', ..., 'name', {str}), + 'alt_title': ('subtitle', {str}, {lambda x: x or None}), + 'duration': ('interval', {int_or_none}), + }), + **traverse_obj(init_data, ('detail', { + 'thumbnail': ('picurl', {url_or_none}), + 'description': ('info', 'intro', 'content', ..., 'value', {str}), + 'genres': ('info', 'genre', 'content', ..., 'value', {str}, all), + }), get_all=False), } - if actual_lrc_lyrics: - info_dict['subtitles'] = { - 'origin': [{ - 'ext': 'lrc', - 'data': actual_lrc_lyrics, - }] - } + if lrc_content: + info_dict['subtitles'] = {'origin': [{'ext': 'lrc', 'data': lrc_content}]} + info_dict['description'] = join_nonempty(info_dict.get('description'), lrc_content, delim='\n') return info_dict -class QQPlaylistBaseIE(InfoExtractor): - @staticmethod - def qq_static_url(category, mid): - return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) - - def get_singer_all_songs(self, singmid, num): - return self._download_webpage( - r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid, - query={ - 'format': 'json', - 'inCharset': 'utf8', - 'outCharset': 'utf-8', - 'platform': 'yqq', - 'needNewCode': 0, - 'singermid': singmid, - 'order': 'listen', - 'begin': 0, - 'num': num, - 'songstatus': 1, - }) - - def get_entries_from_page(self, singmid): - entries = [] - - default_num = 1 - json_text = self.get_singer_all_songs(singmid, default_num) - json_obj_all_songs = self._parse_json(json_text, singmid) - - if json_obj_all_songs['code'] == 0: - total = json_obj_all_songs['data']['total'] - json_text = self.get_singer_all_songs(singmid, total) - json_obj_all_songs = self._parse_json(json_text, singmid) - - for item in json_obj_all_songs['data']['list']: - if item['musicData'].get('songmid') is not None: - songmid = item['musicData']['songmid'] - entries.append(self.url_result( - r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) - - return entries - - -class QQMusicSingerIE(QQPlaylistBaseIE): +class QQMusicSingerIE(QQMusicBaseIE): IE_NAME = 'qqmusic:singer' IE_DESC = 'QQ音乐 - 歌手' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html' - _TEST = { - 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html', + _VALID_URL = r'https?://y\.qq\.com/n/ryqq/singer/(?P<id>[0-9A-Za-z]+)' + _TESTS = [{ + 'url': 'https://y.qq.com/n/ryqq/singer/001BLpXF2DyJe2', 'info_dict': { 'id': '001BLpXF2DyJe2', 'title': '林俊杰', - 'description': 'md5:870ec08f7d8547c29c93010899103751', + 'description': 'md5:10624ce73b06fa400bc846f59b0305fa', + 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, - 'playlist_mincount': 12, - } + 'playlist_mincount': 100, + }, { + 'url': 'https://y.qq.com/n/ryqq/singer/000Q00f213YzNV', + 'info_dict': { + 'id': '000Q00f213YzNV', + 'title': '桃几OvO', + 'description': '小破站小唱见~希望大家喜欢听我唱歌~!', + 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', + }, + 'playlist_count': 12, + 'playlist': [{ + 'info_dict': { + 'id': '0016cvsy02mmCl', + 'ext': 'mp3', + 'title': '群青', + 'album': '桃几2021年翻唱集', + 'release_date': '20210913', + 'duration': 248, + 'creators': ['桃几OvO'], + 'genres': ['Pop'], + 'description': 'md5:4296005a04edcb5cdbe0889d5055a7ae', + 'size': 3970822, + 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', + }, + }], + }] + + _PAGE_SIZE = 50 + + def _fetch_page(self, mid, page_size, page_num): + data = self._make_fcu_req({'req_1': { + 'module': 'music.web_singer_info_svr', + 'method': 'get_singer_detail_info', + 'param': { + 'sort': 5, + 'singermid': mid, + 'sin': page_num * page_size, + 'num': page_size, + }}}, mid, note=f'Downloading page {page_num}') + yield from traverse_obj(data, ('req_1', 'data', 'songlist', ..., {lambda x: self.url_result( + f'https://y.qq.com/n/ryqq/songDetail/{x["mid"]}', QQMusicIE, x['mid'], x.get('title'))})) def _real_extract(self, url): mid = self._match_id(url) + init_data = self._download_init_data(url, mid, fatal=False) - entries = self.get_entries_from_page(mid) - singer_page = self._download_webpage(url, mid, 'Download singer page') - singer_name = self._html_search_regex( - r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None) - singer_desc = None - - if mid: - singer_desc_page = self._download_xml( - 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid, - 'Donwload singer description XML', - query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid}, - headers={'Referer': 'https://y.qq.com/n/yqq/singer/'}) + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, mid, self._PAGE_SIZE), self._PAGE_SIZE), + mid, **traverse_obj(init_data, ('singerDetail', { + 'title': ('basic_info', 'name', {str}), + 'description': ('ex_info', 'desc', {str}), + 'thumbnail': ('pic', 'pic', {url_or_none}), + }))) - singer_desc = singer_desc_page.find('./data/info/desc').text - return self.playlist_result(entries, mid, singer_name, singer_desc) +class QQPlaylistBaseIE(InfoExtractor): + def _extract_entries(self, info_json, path): + for song in traverse_obj(info_json, path): + song_mid = song.get('songmid') + if not song_mid: + continue + yield self.url_result( + f'https://y.qq.com/n/ryqq/songDetail/{song_mid}', + QQMusicIE, song_mid, song.get('songname')) class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html' + _VALID_URL = r'https?://y\.qq\.com/n/ryqq/albumDetail/(?P<id>[0-9A-Za-z]+)' _TESTS = [{ - 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html', + 'url': 'https://y.qq.com/n/ryqq/albumDetail/000gXCTb2AhRR1', 'info_dict': { 'id': '000gXCTb2AhRR1', 'title': '我们都是这样长大的', @@ -237,10 +321,10 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): }, 'playlist_count': 4, }, { - 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html', + 'url': 'https://y.qq.com/n/ryqq/albumDetail/002Y5a3b3AlCu3', 'info_dict': { 'id': '002Y5a3b3AlCu3', - 'title': '그리고...', + 'title': '그리고…', 'description': 'md5:a48823755615508a95080e81b51ba729', }, 'playlist_count': 8, @@ -249,49 +333,45 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): def _real_extract(self, url): mid = self._match_id(url) - album = self._download_json( - 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid, - mid, 'Download album page')['data'] + album_json = self._download_json( + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg', + mid, 'Download album page', + query={'albummid': mid, 'format': 'json'})['data'] - entries = [ - self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] - ) for song in album['list'] - ] - album_name = album.get('name') - album_detail = album.get('desc') - if album_detail is not None: - album_detail = album_detail.strip() + entries = self._extract_entries(album_json, ('list', ...)) - return self.playlist_result(entries, mid, album_name, album_detail) + return self.playlist_result(entries, mid, **traverse_obj(album_json, { + 'title': ('name', {str}), + 'description': ('desc', {str.strip}), + })) class QQMusicToplistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:toplist' IE_DESC = 'QQ音乐 - 排行榜' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://y\.qq\.com/n/ryqq/toplist/(?P<id>[0-9]+)' _TESTS = [{ - 'url': 'https://y.qq.com/n/yqq/toplist/123.html', + 'url': 'https://y.qq.com/n/ryqq/toplist/123', 'info_dict': { 'id': '123', - 'title': '美国iTunes榜', - 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08', + 'title': r're:美国热门音乐榜 \d{4}-\d{2}-\d{2}', + 'description': '美国热门音乐榜,每周一更新。', }, - 'playlist_count': 100, + 'playlist_count': 95, }, { - 'url': 'https://y.qq.com/n/yqq/toplist/3.html', + 'url': 'https://y.qq.com/n/ryqq/toplist/3', 'info_dict': { 'id': '3', - 'title': '巅峰榜·欧美', - 'description': 'md5:5a600d42c01696b26b71f8c4d43407da', + 'title': r're:巅峰榜·欧美 \d{4}-\d{2}-\d{2}', + 'description': 'md5:4def03b60d3644be4c9a36f21fd33857', }, 'playlist_count': 100, }, { - 'url': 'https://y.qq.com/n/yqq/toplist/106.html', + 'url': 'https://y.qq.com/n/ryqq/toplist/106', 'info_dict': { 'id': '106', - 'title': '韩国Mnet榜', + 'title': r're:韩国Mnet榜 \d{4}-\d{2}-\d{2}', 'description': 'md5:cb84b325215e1d21708c615cac82a6e7', }, 'playlist_count': 50, @@ -305,33 +385,20 @@ class QQMusicToplistIE(QQPlaylistBaseIE): note='Download toplist page', query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) - entries = [self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', - song['data']['songmid']) - for song in toplist_json['songlist']] - - topinfo = toplist_json.get('topinfo', {}) - list_name = topinfo.get('ListName') - list_description = topinfo.get('info') - return self.playlist_result(entries, list_id, list_name, list_description) + return self.playlist_result( + self._extract_entries(toplist_json, ('songlist', ..., 'data')), list_id, + playlist_title=join_nonempty(*traverse_obj( + toplist_json, ((('topinfo', 'ListName'), 'update_time'), None)), delim=' '), + playlist_description=traverse_obj(toplist_json, ('topinfo', 'info'))) class QQMusicPlaylistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:playlist' IE_DESC = 'QQ音乐 - 歌单' - _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://y\.qq\.com/n/ryqq/playlist/(?P<id>[0-9]+)' _TESTS = [{ - 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html', - 'info_dict': { - 'id': '3462654915', - 'title': '韩国5月新歌精选下旬', - 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', - }, - 'playlist_count': 40, - 'skip': 'playlist gone', - }, { - 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html', + 'url': 'https://y.qq.com/n/ryqq/playlist/1374105607', 'info_dict': { 'id': '1374105607', 'title': '易入人心的华语民谣', @@ -347,19 +414,83 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE): 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', list_id, 'Download list page', query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, - transform_source=strip_jsonp) + transform_source=strip_jsonp, headers={'Referer': url}) if not len(list_json.get('cdlist', [])): - if list_json.get('code'): - raise ExtractorError( - 'QQ Music said: error %d in fetching playlist info' % list_json['code'], - expected=True) - raise ExtractorError('Unable to get playlist info') - - cdlist = list_json['cdlist'][0] - entries = [self.url_result( - 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']) - for song in cdlist['songlist']] - - list_name = cdlist.get('dissname') - list_description = clean_html(unescapeHTML(cdlist.get('desc'))) - return self.playlist_result(entries, list_id, list_name, list_description) + raise ExtractorError(join_nonempty( + 'Unable to get playlist info', + join_nonempty('code', 'subcode', from_dict=list_json), + list_json.get('msg'), delim=': ')) + + entries = self._extract_entries(list_json, ('cdlist', 0, 'songlist', ...)) + + return self.playlist_result(entries, list_id, **traverse_obj(list_json, ('cdlist', 0, { + 'title': ('dissname', {str}), + 'description': ('desc', {unescapeHTML}, {clean_html}), + }))) + + +class QQMusicVideoIE(QQMusicBaseIE): + IE_NAME = 'qqmusic:mv' + IE_DESC = 'QQ音乐 - MV' + _VALID_URL = r'https?://y\.qq\.com/n/ryqq/mv/(?P<id>[0-9A-Za-z]+)' + + _TESTS = [{ + 'url': 'https://y.qq.com/n/ryqq/mv/002Vsarh3SVU8K', + 'info_dict': { + 'id': '002Vsarh3SVU8K', + 'ext': 'mp4', + 'title': 'The Chant (Extended Mix / Audio)', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', + 'release_timestamp': 1688918400, + 'release_date': '20230709', + 'duration': 313, + 'creators': ['Duke Dumont'], + 'view_count': int, + }, + }] + + def _parse_url_formats(self, url_data): + return traverse_obj(url_data, ('mp4', lambda _, v: v['freeflow_url'], { + 'url': ('freeflow_url', 0, {url_or_none}), + 'filesize': ('fileSize', {int_or_none}), + 'format_id': ('newFileType', {str_or_none}), + })) + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_info = self._make_fcu_req({ + 'mvInfo': { + 'module': 'music.video.VideoData', + 'method': 'get_video_info_batch', + 'param': { + 'vidlist': [video_id], + 'required': [ + 'vid', 'type', 'sid', 'cover_pic', 'duration', 'singers', + 'video_pay', 'hint', 'code', 'msg', 'name', 'desc', + 'playcnt', 'pubdate', 'play_forbid_reason'], + }, + }, + 'mvUrl': { + 'module': 'music.stream.MvUrlProxy', + 'method': 'GetMvUrls', + 'param': {'vids': [video_id]}, + }, + }, video_id, headers=self.geo_verification_headers()) + if traverse_obj(video_info, ('mvInfo', 'data', video_id, 'play_forbid_reason')) == 3: + self.raise_geo_restricted() + + return { + 'id': video_id, + 'formats': self._parse_url_formats(traverse_obj(video_info, ('mvUrl', 'data', video_id))), + **traverse_obj(video_info, ('mvInfo', 'data', video_id, { + 'title': ('name', {str}), + 'description': ('desc', {str}), + 'thumbnail': ('cover_pic', {url_or_none}), + 'release_timestamp': ('pubdate', {int_or_none}), + 'duration': ('duration', {int_or_none}), + 'creators': ('singers', ..., 'name', {str}), + 'view_count': ('playcnt', {int_or_none}), + })), + } diff --git a/yt_dlp/extractor/r7.py b/yt_dlp/extractor/r7.py index 36f0b52..79a4c0a 100644 --- a/yt_dlp/extractor/r7.py +++ b/yt_dlp/extractor/r7.py @@ -42,7 +42,7 @@ class R7IE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://player-api.r7.com/video/i/%s' % video_id, video_id) + f'http://player-api.r7.com/video/i/{video_id}', video_id) title = video['title'] @@ -98,7 +98,7 @@ class R7ArticleIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) + return False if R7IE.suitable(url) else super().suitable(url) def _real_extract(self, url): display_id = self._match_id(url) @@ -109,4 +109,4 @@ class R7ArticleIE(InfoExtractor): r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', webpage, 'video id') - return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) + return self.url_result(f'http://player.r7.com/video/i/{video_id}', R7IE.ie_key()) diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index f013582..b0b6681 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -100,8 +100,8 @@ class RadikoBaseIE(InfoExtractor): def _find_program(self, video_id, station, cursor): station_program = self._download_xml( - 'https://radiko.jp/v3/program/station/weekly/%s.xml' % station, video_id, - note='Downloading radio program for %s station' % station) + f'https://radiko.jp/v3/program/station/weekly/{station}.xml', video_id, + note=f'Downloading radio program for {station} station') prog = None for p in station_program.findall('.//prog'): @@ -207,8 +207,8 @@ class RadikoIE(RadikoBaseIE): 'ft': radio_begin, 'end_at': radio_end, 'to': radio_end, - 'seek': video_id - } + 'seek': video_id, + }, ), } diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py index 4a09dcd..950b9ec 100644 --- a/yt_dlp/extractor/radiocanada.py +++ b/yt_dlp/extractor/radiocanada.py @@ -24,7 +24,7 @@ class RadioCanadaIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # empty Title @@ -50,7 +50,7 @@ class RadioCanadaIE(InfoExtractor): 'series': 'District 31', }, 'only_matching': True, - } + }, ] _GEO_COUNTRIES = ['CA'] _access_token = None @@ -111,7 +111,7 @@ class RadioCanadaIE(InfoExtractor): if error == 'Le contenu sélectionné est disponible seulement en premium': self.raise_login_required(error) raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error), expected=True) + f'{self.IE_NAME} said: {error}', expected=True) formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') subtitles = {} @@ -162,4 +162,4 @@ class RadioCanadaAudioVideoIE(InfoExtractor): }] def _real_extract(self, url): - return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) + return self.url_result(f'radiocanada:medianet:{self._match_id(url)}') diff --git a/yt_dlp/extractor/radiocomercial.py b/yt_dlp/extractor/radiocomercial.py index 0c21977..7e4609a 100644 --- a/yt_dlp/extractor/radiocomercial.py +++ b/yt_dlp/extractor/radiocomercial.py @@ -32,7 +32,7 @@ class RadioComercialIE(InfoExtractor): 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', 'season': 'Season 6', 'season_number': 6, - } + }, }, { 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem', 'md5': '47e96c273aef96a8eb160cd6cf46d782', @@ -44,7 +44,7 @@ class RadioComercialIE(InfoExtractor): 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', 'season': 'Season 3', 'season_number': 3, - } + }, }, { 'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao', 'md5': '69be64255420fec23b7259955d771e54', @@ -73,7 +73,7 @@ class RadioComercialIE(InfoExtractor): 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', 'season': 'Season 2023', 'season_number': 2023, - } + }, }] def _real_extract(self, url): @@ -99,28 +99,28 @@ class RadioComercialPlaylistIE(InfoExtractor): 'id': 'convenca-me-num-minuto_t3', 'title': 'Convença-me num Minuto - Temporada 3', }, - 'playlist_mincount': 32 + 'playlist_mincount': 32, }, { 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao', 'info_dict': { 'id': 'o-homem-que-mordeu-o-cao', 'title': 'O Homem Que Mordeu o Cão', }, - 'playlist_mincount': 19 + 'playlist_mincount': 19, }, { 'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas', 'info_dict': { 'id': 'as-minhas-coisas-favoritas', 'title': 'As Minhas Coisas Favoritas', }, - 'playlist_mincount': 131 + 'playlist_mincount': 131, }, { 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023', 'info_dict': { 'id': 'tnt-todos-no-top_t2023', 'title': 'TNT - Todos No Top - Temporada 2023', }, - 'playlist_mincount': 39 + 'playlist_mincount': 39, }] def _entries(self, url, playlist_id): diff --git a/yt_dlp/extractor/radiode.py b/yt_dlp/extractor/radiode.py index 7262078..1bf7449 100644 --- a/yt_dlp/extractor/radiode.py +++ b/yt_dlp/extractor/radiode.py @@ -17,7 +17,7 @@ class RadioDeIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, } def _real_extract(self, url): @@ -37,7 +37,7 @@ class RadioDeIE(InfoExtractor): 'ext': stream['streamContentFormat'].lower(), 'acodec': stream['streamContentFormat'], 'abr': stream['bitRate'], - 'asr': stream['sampleRate'] + 'asr': stream['sampleRate'], } for stream in broadcast['streamUrls']] return { diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index 6bd6fe9..ff21963 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -126,7 +126,7 @@ class FranceCultureIE(RadioFranceBaseIE): }, { 'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200', 'only_matching': True, - } + }, ] def _real_extract(self, url): @@ -150,7 +150,7 @@ class FranceCultureIE(RadioFranceBaseIE): 'uploader': self._html_search_regex( r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None), 'upload_date': unified_strdate(self._search_regex( - r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False)) + r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False)), } diff --git a/yt_dlp/extractor/radiojavan.py b/yt_dlp/extractor/radiojavan.py index b3befae..53cbbe3 100644 --- a/yt_dlp/extractor/radiojavan.py +++ b/yt_dlp/extractor/radiojavan.py @@ -25,7 +25,7 @@ class RadioJavanIE(InfoExtractor): 'view_count': int, 'like_count': int, 'dislike_count': int, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py index 5d7d3dd..2d08708 100644 --- a/yt_dlp/extractor/radiokapital.py +++ b/yt_dlp/extractor/radiokapital.py @@ -12,7 +12,7 @@ class RadioKapitalBaseIE(InfoExtractor): video_id, note=note) def _parse_episode(self, data): - release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3]) + release = '{}{}{}'.format(data['published'][6:11], data['published'][3:6], data['published'][:3]) return { '_type': 'url_transparent', 'url': data['mixcloud_url'], @@ -46,7 +46,7 @@ class RadioKapitalIE(RadioKapitalBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - episode = self._call_api('episodes/%s' % video_id, video_id) + episode = self._call_api(f'episodes/{video_id}', video_id) return self._parse_episode(episode) diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py index 632c8c2..e91b3b2 100644 --- a/yt_dlp/extractor/radiozet.py +++ b/yt_dlp/extractor/radiozet.py @@ -21,7 +21,7 @@ class RadioZetPodcastIE(InfoExtractor): 'duration': 83, 'series': 'Nie Ma Za Co', 'creator': 'Katarzyna Pakosińska', - } + }, } def _call_api(self, podcast_id, display_id): diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py index 325e278..6050892 100644 --- a/yt_dlp/extractor/radlive.py +++ b/yt_dlp/extractor/radlive.py @@ -27,7 +27,7 @@ class RadLiveIE(InfoExtractor): 'channel': 'Proximity', 'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009', 'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009', - } + }, }, { 'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', 'md5': '40b2175f347592125d93e9a344080125', @@ -104,7 +104,7 @@ class RadLiveSeasonIE(RadLiveIE): # XXX: Do not subclass from concrete IE @classmethod def suitable(cls, url): - return False if RadLiveIE.suitable(url) else super(RadLiveSeasonIE, cls).suitable(url) + return False if RadLiveIE.suitable(url) else super().suitable(url) def _real_extract(self, url): season_id = self._match_id(url) @@ -154,7 +154,7 @@ query WebChannelListing ($lrn: ID!) { @classmethod def suitable(cls, url): - return False if RadLiveIE.suitable(url) else super(RadLiveChannelIE, cls).suitable(url) + return False if RadLiveIE.suitable(url) else super().suitable(url) def _real_extract(self, url): channel_id = self._match_id(url) @@ -164,8 +164,8 @@ query WebChannelListing ($lrn: ID!) { headers={'Content-Type': 'application/json'}, data=json.dumps({ 'query': self._QUERY, - 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'} - }).encode('utf-8')) + 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'}, + }).encode()) data = traverse_obj(graphql, ('data', 'channel')) if not data: diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index c2e7a6f..efb47af 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -143,7 +143,7 @@ class RaiBaseIE(InfoExtractor): } def percentage(number, target, pc=20, roof=125): - '''check if the target is in the range of number +/- percent''' + """check if the target is in the range of number +/- percent""" if not number or number < 0: return False return abs(target - number) < min(float(number) * float(pc) / 100.0, roof) @@ -199,7 +199,7 @@ class RaiBaseIE(InfoExtractor): # filter out single-stream formats fmts = [f for f in fmts - if not f.get('vcodec') == 'none' and not f.get('acodec') == 'none'] + if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] mobj = re.search(_MANIFEST_REG, manifest_url) if not mobj: @@ -213,7 +213,7 @@ class RaiBaseIE(InfoExtractor): 'url': _MP4_TMPL % (relinker_url, q), 'protocol': 'https', 'ext': 'mp4', - **get_format_info(q) + **get_format_info(q), }) return formats @@ -299,7 +299,7 @@ class RaiPlayIE(RaiBaseIE): 'formats': 'count:7', }, 'params': {'skip_download': True}, - 'expected_warnings': ['Video not available. Likely due to geo-restriction.'] + 'expected_warnings': ['Video not available. Likely due to geo-restriction.'], }, { # 1500 quality 'url': 'https://www.raiplay.it/video/2012/09/S1E11---Tutto-cio-che-luccica-0cab3323-732e-45d6-8e86-7704acab6598.html', @@ -373,7 +373,7 @@ class RaiPlayIE(RaiBaseIE): 'episode_number': int_or_none(media.get('episode')), 'subtitles': self._extract_subtitles(url, video), 'release_year': int_or_none(traverse_obj(media, ('track_info', 'edit_year'))), - **relinker_info + **relinker_info, } @@ -596,7 +596,7 @@ class RaiIE(RaiBaseIE): 'upload_date': '20140612', }, 'params': {'skip_download': True}, - 'expected_warnings': ['Video not available. Likely due to geo-restriction.'] + 'expected_warnings': ['Video not available. Likely due to geo-restriction.'], }, { 'url': 'https://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', 'info_dict': { @@ -606,7 +606,7 @@ class RaiIE(RaiBaseIE): 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 2214, - 'upload_date': '20161103' + 'upload_date': '20161103', }, 'params': {'skip_download': True}, }, { @@ -632,7 +632,7 @@ class RaiIE(RaiBaseIE): 'ext': media.get('formatoAudio'), 'vcodec': 'none', 'acodec': media.get('formatoAudio'), - }] + }], } elif 'Video' in media['type']: relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) @@ -652,7 +652,7 @@ class RaiIE(RaiBaseIE): 'upload_date': unified_strdate(media.get('date')), 'duration': parse_duration(media.get('length')), 'subtitles': self._extract_subtitles(url, media), - **relinker_info + **relinker_info, } @@ -721,7 +721,7 @@ class RaiNewsIE(RaiBaseIE): 'title': player_data.get('title') or track_info.get('title') or self._og_search_title(webpage), 'upload_date': unified_strdate(track_info.get('date')), 'uploader': strip_or_none(track_info.get('editor') or None), - **relinker_info + **relinker_info, } diff --git a/yt_dlp/extractor/raywenderlich.py b/yt_dlp/extractor/raywenderlich.py index e0e3c3e..3e74fd8 100644 --- a/yt_dlp/extractor/raywenderlich.py +++ b/yt_dlp/extractor/raywenderlich.py @@ -2,7 +2,6 @@ import re from .common import InfoExtractor from .vimeo import VimeoIE -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -67,12 +66,12 @@ class RayWenderlichIE(InfoExtractor): continue video_id = content.get('identifier') if video_id: - return compat_str(video_id) + return str(video_id) def _real_extract(self, url): mobj = self._match_valid_url(url) course_id, lesson_id = mobj.group('course_id', 'id') - display_id = '%s/%s' % (course_id, lesson_id) + display_id = f'{course_id}/{lesson_id}' webpage = self._download_webpage(url, display_id) @@ -110,8 +109,8 @@ class RayWenderlichIE(InfoExtractor): if csrf_token: headers['X-CSRF-Token'] = csrf_token video = self._download_json( - 'https://videos.raywenderlich.com/api/v1/videos/%s.json' - % video_id, display_id, headers=headers)['video'] + f'https://videos.raywenderlich.com/api/v1/videos/{video_id}.json', + display_id, headers=headers)['video'] vimeo_id = video['clips'][0]['provider_id'] info.update({ '_type': 'url_transparent', @@ -124,7 +123,7 @@ class RayWenderlichIE(InfoExtractor): return merge_dicts(info, self.url_result( VimeoIE._smuggle_referrer( - 'https://player.vimeo.com/video/%s' % vimeo_id, url), + f'https://player.vimeo.com/video/{vimeo_id}', url), ie=VimeoIE.ie_key(), video_id=vimeo_id)) @@ -152,8 +151,7 @@ class RayWenderlichCourseIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if RayWenderlichIE.suitable(url) else super( - RayWenderlichCourseIE, cls).suitable(url) + return False if RayWenderlichIE.suitable(url) else super().suitable(url) def _real_extract(self, url): course_id = self._match_id(url) @@ -163,7 +161,7 @@ class RayWenderlichCourseIE(InfoExtractor): entries = [] lesson_urls = set() for lesson_url in re.findall( - r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage): + rf'<a[^>]+\bhref=["\'](/{course_id}/lessons/\d+)', webpage): if lesson_url in lesson_urls: continue lesson_urls.add(lesson_url) diff --git a/yt_dlp/extractor/rbgtum.py b/yt_dlp/extractor/rbgtum.py index 5f2d0c1..5bb4655 100644 --- a/yt_dlp/extractor/rbgtum.py +++ b/yt_dlp/extractor/rbgtum.py @@ -15,7 +15,7 @@ class RbgTumIE(InfoExtractor): 'ext': 'mp4', 'title': 'Lecture: October 18. 2022', 'series': 'Concepts of C++ programming (IN2377)', - } + }, }, { # Presentation only 'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES', @@ -25,7 +25,7 @@ class RbgTumIE(InfoExtractor): 'ext': 'mp4', 'title': 'Lecture 3: Introduction to Neural Networks', 'series': 'Introduction to Deep Learning (IN2346)', - } + }, }, { # Camera only 'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM', @@ -35,11 +35,11 @@ class RbgTumIE(InfoExtractor): 'ext': 'mp4', 'title': 'Fachschaftsvollversammlung', 'series': 'Fachschaftsvollversammlung Informatik', - } + }, }, { 'url': 'https://tum.live/w/linalginfo/27102', 'only_matching': True, - }, ] + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -84,7 +84,7 @@ class RbgTumCourseIE(InfoExtractor): }, { 'url': 'https://tum.live/old/course/2023/S/linalginfo', 'only_matching': True, - }, ] + }] def _real_extract(self, url): course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug') diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py index b865f63..1925afb 100644 --- a/yt_dlp/extractor/rcs.py +++ b/yt_dlp/extractor/rcs.py @@ -64,7 +64,7 @@ class RCSBaseIE(InfoExtractor): 'media2-doveviaggi-it.akamaized': 'viaggi', 'media2-vivimilano-corriere-it.akamaized': 'vivimilano', 'vivimilano-vh.akamaihd': 'vivimilano', - 'media2-youreporter-it.akamaized': 'youreporter' + 'media2-youreporter-it.akamaized': 'youreporter', } def _get_video_src(self, video): @@ -97,7 +97,7 @@ class RCSBaseIE(InfoExtractor): yield { 'type': type_, 'url': url, - 'bitrate': source.get('bitrate') + 'bitrate': source.get('bitrate'), } def _create_http_formats(self, m3u8_formats, video_id): @@ -185,7 +185,7 @@ class RCSBaseIE(InfoExtractor): return { '_type': 'url_transparent', 'url': emb, - 'ie_key': RCSEmbedsIE.ie_key() + 'ie_key': RCSEmbedsIE.ie_key(), } if not video_data: @@ -236,13 +236,13 @@ class RCSEmbedsIE(RCSBaseIE): 'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"', 'description': 'md5:65b09633df9ffee57f48b39e34c9e067', 'uploader': 'rcs.it', - } + }, }, { 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140', - 'only_matching': True + 'only_matching': True, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.iodonna.it/video-iodonna/personaggi-video/monica-bellucci-piu-del-lavoro-oggi-per-me-sono-importanti-lamicizia-e-la-famiglia/', @@ -252,7 +252,7 @@ class RCSEmbedsIE(RCSBaseIE): 'title': 'Monica Bellucci: «Più del lavoro, oggi per me sono importanti l\'amicizia e la famiglia»', 'description': 'md5:daea6d9837351e56b1ab615c06bebac1', 'uploader': 'rcs.it', - } + }, }] @staticmethod @@ -286,7 +286,7 @@ class RCSIE(RCSBaseIE): 'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante', 'description': 'md5:3915ce5ebb3d2571deb69a5eb85ac9b5', 'uploader': 'Corriere Tv', - } + }, }, { # search for video id inside the page 'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/', @@ -298,7 +298,7 @@ class RCSIE(RCSBaseIE): 'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen', 'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8', 'uploader': 'DOVE Viaggi', - } + }, }, { # only audio format https://github.com/yt-dlp/yt-dlp/issues/5683 'url': 'https://video.corriere.it/cronaca/audio-telefonata-il-papa-becciu-santita-lettera-che-mi-ha-inviato-condanna/b94c0d20-70c2-11ed-9572-e4b947a0ebd2', @@ -310,7 +310,7 @@ class RCSIE(RCSBaseIE): 'description': 'md5:c0ddb61bd94a8d4e0d4bb9cda50a689b', 'uploader': 'Corriere Tv', 'formats': [{'format_id': 'https-mp3', 'ext': 'mp3'}], - } + }, }, { # old content still needs cdn migration 'url': 'https://viaggi.corriere.it/video/milano-varallo-sesia-sul-treno-a-vapore/', @@ -322,10 +322,10 @@ class RCSIE(RCSBaseIE): 'title': 'Milano-Varallo Sesia sul treno a vapore', 'description': 'md5:6348f47aac230397fe341a74f7678d53', 'uploader': 'DOVE Viaggi', - } + }, }, { 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945', - 'only_matching': True + 'only_matching': True, }] @@ -346,7 +346,7 @@ class RCSVariousIE(RCSBaseIE): 'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto', 'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5', 'uploader': 'leitv.it', - } + }, }, { 'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/', 'md5': '3989b6d603482611a2abd2f32b79f739', @@ -357,7 +357,7 @@ class RCSVariousIE(RCSBaseIE): 'title': 'Fiume Sesia 3 ottobre 2020', 'description': 'md5:0070eef1cc884d13c970a4125063de55', 'uploader': 'youreporter.it', - } + }, }, { 'url': 'https://www.amica.it/video-post/saint-omer-al-cinema-il-film-leone-dargento-che-ribalta-gli-stereotipi/', 'md5': '187cce524dfd0343c95646c047375fc4', @@ -368,5 +368,5 @@ class RCSVariousIE(RCSBaseIE): 'title': '"Saint Omer": al cinema il film Leone d\'argento che ribalta gli stereotipi', 'description': 'md5:b1c8869c2dcfd6073a2a311ba0008aa8', 'uploader': 'rcs.it', - } + }, }] diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py index 9c382e2..61b73a5 100644 --- a/yt_dlp/extractor/rcti.py +++ b/yt_dlp/extractor/rcti.py @@ -154,34 +154,34 @@ class RCTIPlusIE(RCTIPlusBaseIE): is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date']) if is_upcoming: self.raise_no_formats( - 'This event will start at %s.' % video_json['live_label'] if video_json.get('live_label') else 'This event has not started yet.', expected=True) + 'This event will start at {}.'.format(video_json['live_label']) if video_json.get('live_label') else 'This event has not started yet.', expected=True) if 'akamaized' in video_url: # For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API conviva_json_data = { **self._CONVIVA_JSON_TEMPLATE, 'url': video_url, - 'sst': int(time.time()) + 'sst': int(time.time()), } conviva_json_res = self._download_json( 'https://ff84ae928c3b33064b76dec08f12500465e59a6f.cws.conviva.com/0/wsg', display_id, 'Creating Conviva session', 'Failed to create Conviva session', - fatal=False, data=json.dumps(conviva_json_data).encode('utf-8')) + fatal=False, data=json.dumps(conviva_json_data).encode()) if conviva_json_res and conviva_json_res.get('err') != 'ok': - self.report_warning('Conviva said: %s' % str(conviva_json_res.get('err'))) + self.report_warning('Conviva said: {}'.format(str(conviva_json_res.get('err')))) video_meta, meta_paths = self._call_api( - 'https://api.rctiplus.com/api/v1/%s/%s' % (video_type, video_id), display_id, 'Downloading video metadata') + f'https://api.rctiplus.com/api/v1/{video_type}/{video_id}', display_id, 'Downloading video metadata') thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/') if video_meta.get('portrait_image'): thumbnails.append({ 'id': 'portrait_image', - 'url': '%s%d%s' % (image_path, 2000, video_meta['portrait_image']) # 2000px seems to be the highest resolution that can be given + 'url': '{}{}{}'.format(image_path, 2000, video_meta['portrait_image']), # 2000px seems to be the highest resolution that can be given }) if video_meta.get('landscape_image'): thumbnails.append({ 'id': 'landscape_image', - 'url': '%s%d%s' % (image_path, 2000, video_meta['landscape_image']) + 'url': '{}{}{}'.format(image_path, 2000, video_meta['landscape_image']), }) try: formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'}) @@ -241,7 +241,7 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE): 'age_limit': 2, 'tags': 'count:11', 'display_id': 'inews-pagi', - } + }, }] _AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings 'S-SU': 2, @@ -255,13 +255,13 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE): @classmethod def suitable(cls, url): - return False if RCTIPlusIE.suitable(url) else super(RCTIPlusSeriesIE, cls).suitable(url) + return False if RCTIPlusIE.suitable(url) else super().suitable(url) def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}): total_pages = 0 try: total_pages = self._call_api( - '%s&length=20&page=0' % url, + f'{url}&length=20&page=0', display_id, note)[1]['pagination']['total_page'] except ExtractorError as e: if 'not found' in str(e): @@ -272,8 +272,8 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE): for page_num in range(1, total_pages + 1): episode_list = self._call_api( - '%s&length=20&page=%s' % (url, page_num), - display_id, '%s page %s' % (note, page_num))[0] or [] + f'{url}&length=20&page={page_num}', + display_id, f'{note} page {page_num}')[0] or [] for video_json in episode_list: yield { @@ -288,7 +288,7 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE): 'duration': video_json.get('duration'), 'season_number': video_json.get('season'), 'episode_number': video_json.get('episode'), - **metadata + **metadata, } def _series_entries(self, series_id, display_id=None, video_type=None, metadata={}): @@ -348,7 +348,7 @@ class RCTIPlusTVIE(RCTIPlusBaseIE): }, 'params': { 'skip_download': True, - } + }, }, { # Returned video will always change 'url': 'https://www.rctiplus.com/live-event', @@ -361,7 +361,7 @@ class RCTIPlusTVIE(RCTIPlusBaseIE): @classmethod def suitable(cls, url): - return False if RCTIPlusIE.suitable(url) else super(RCTIPlusTVIE, cls).suitable(url) + return False if RCTIPlusIE.suitable(url) else super().suitable(url) def _real_extract(self, url): match = self._match_valid_url(url).groupdict() diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py index cc76b89..975b091 100644 --- a/yt_dlp/extractor/rds.py +++ b/yt_dlp/extractor/rds.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( js_to_json, parse_duration, @@ -24,7 +23,7 @@ class RDSIE(InfoExtractor): 'timestamp': 1606129030, 'upload_date': '20201123', 'duration': 773.039, - } + }, }, { 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934', 'only_matching': True, @@ -36,7 +35,7 @@ class RDSIE(InfoExtractor): webpage = self._download_webpage(url, display_id) item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) - video_id = compat_str(item['id']) + video_id = str(item['id']) title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( 'title', webpage, 'title', fatal=True) description = self._og_search_description(webpage) or self._html_search_meta( @@ -57,7 +56,7 @@ class RDSIE(InfoExtractor): '_type': 'url_transparent', 'id': video_id, 'display_id': display_id, - 'url': '9c9media:rds_web:%s' % video_id, + 'url': f'9c9media:rds_web:{video_id}', 'title': title, 'description': description, 'thumbnail': thumbnail, diff --git a/yt_dlp/extractor/redbee.py b/yt_dlp/extractor/redbee.py index 4d71133..d43bb0b 100644 --- a/yt_dlp/extractor/redbee.py +++ b/yt_dlp/extractor/redbee.py @@ -41,8 +41,8 @@ class RedBeeBaseIE(InfoExtractor): return self._download_json( f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}', - asset_id, data=json.dumps(request).encode('utf-8'), headers={ - 'Content-Type': 'application/json;charset=utf-8' + asset_id, data=json.dumps(request).encode(), headers={ + 'Content-Type': 'application/json;charset=utf-8', })['sessionToken'] def _get_formats_and_subtitles(self, asset_id, **kwargs): @@ -51,26 +51,26 @@ class RedBeeBaseIE(InfoExtractor): f'{self._API_URL}/entitlement/{asset_id}/play', asset_id, headers={ 'Authorization': f'Bearer {bearer_token}', - 'Accept': 'application/json, text/plain, */*' + 'Accept': 'application/json, text/plain, */*', }) formats, subtitles = [], {} - for format in api_response['formats']: - if not format.get('mediaLocator'): + for format_data in api_response['formats']: + if not format_data.get('mediaLocator'): continue fmts, subs = [], {} - if format.get('format') == 'DASH': + if format_data.get('format') == 'DASH': fmts, subs = self._extract_mpd_formats_and_subtitles( - format['mediaLocator'], asset_id, fatal=False) - elif format.get('format') == 'SMOOTHSTREAMING': + format_data['mediaLocator'], asset_id, fatal=False) + elif format_data.get('format') == 'SMOOTHSTREAMING': fmts, subs = self._extract_ism_formats_and_subtitles( - format['mediaLocator'], asset_id, fatal=False) - elif format.get('format') == 'HLS': + format_data['mediaLocator'], asset_id, fatal=False) + elif format_data.get('format') == 'HLS': fmts, subs = self._extract_m3u8_formats_and_subtitles( - format['mediaLocator'], asset_id, fatal=False) + format_data['mediaLocator'], asset_id, fatal=False) - if format.get('drm'): + if format_data.get('drm'): for f in fmts: f['has_drm'] = True @@ -240,12 +240,12 @@ class RTBFIE(RedBeeBaseIE): 'APIKey': self._GIGYA_API_KEY, 'targetEnv': 'jssdk', 'sessionExpiration': '-2', - }).encode('utf-8'), headers={ + }).encode(), headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) if login_response['statusCode'] != 200: - raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True) + raise ExtractorError('Login failed. Server message: {}'.format(login_response['errorMessage']), expected=True) self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'], secure=True, expire_time=time.time() + 3600) @@ -287,7 +287,7 @@ class RTBFIE(RedBeeBaseIE): error = data.get('error') if error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {error}', expected=True) provider = data.get('provider') if provider in self._PROVIDERS: diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index fac51b9..ceeef52 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -57,7 +57,7 @@ class RedBullTVIE(InfoExtractor): 'os_family': 'http', }) if session.get('code') == 'error': - raise ExtractorError('%s said: %s' % ( + raise ExtractorError('{} said: {}'.format( self.IE_NAME, session['message'])) token = session['token'] @@ -65,20 +65,19 @@ class RedBullTVIE(InfoExtractor): video = self._download_json( 'https://api.redbull.tv/v3/products/' + video_id, video_id, note='Downloading video information', - headers={'Authorization': token} + headers={'Authorization': token}, ) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 404: error_message = self._parse_json( e.cause.response.read().decode(), video_id)['error'] - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, error_message), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True) raise title = video['title'].strip() formats, subtitles = self._extract_m3u8_formats_and_subtitles( - 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token), + f'https://dms.redbull.tv/v3/{video_id}/{token}/playlist.m3u8', video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') for resource in video.get('resources', []): @@ -86,13 +85,13 @@ class RedBullTVIE(InfoExtractor): splitted_resource = resource.split('_') if splitted_resource[2]: subtitles.setdefault('en', []).append({ - 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource), + 'url': f'https://resources.redbull.tv/{video_id}/{resource}', 'ext': splitted_resource[2], }) subheading = video.get('subheading') if subheading: - title += ' - %s' % subheading + title += f' - {subheading}' return { 'id': video_id, @@ -135,7 +134,7 @@ class RedBullEmbedIE(RedBullTVIE): # XXX: Do not subclass from concrete IE %s %s } -}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'), +}''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'), # noqa: UP031 })['data']['resource']['videoEssence']['attributes']['assetId'] return self.extract_info(asset_id) @@ -155,7 +154,7 @@ class RedBullTVRrnContentIE(InfoExtractor): def _real_extract(self, url): region, lang, rrn_id = self._match_valid_url(url).groups() - rrn_id += ':%s-%s' % (lang, region.upper()) + rrn_id += f':{lang}-{region.upper()}' return self.url_result( 'https://www.redbull.com/embed/' + rrn_id, RedBullEmbedIE.ie_key(), rrn_id) @@ -209,7 +208,7 @@ class RedBullIE(InfoExtractor): regions.append('LAT') if lang in self._INT_FALLBACK_LIST: regions.append('INT') - locale = '>'.join(['%s-%s' % (lang, reg) for reg in regions]) + locale = '>'.join([f'{lang}-{reg}' for reg in regions]) rrn_id = self._download_json( 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale, diff --git a/yt_dlp/extractor/redge.py b/yt_dlp/extractor/redge.py index 875d6f8..7cb91ee 100644 --- a/yt_dlp/extractor/redge.py +++ b/yt_dlp/extractor/redge.py @@ -51,14 +51,14 @@ class RedCDNLivxIE(InfoExtractor): 'only_matching': True, }] - """ + ''' Known methods (first in url path): - `livedash` - DASH MPD - `livehls` - HTTP Live Streaming - `livess` - IIS Smooth Streaming - `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac - `sc` - shoutcast/icecast (audio streams, like radio) - """ + ''' def _real_extract(self, url): tenant, path = self._match_valid_url(url).group('tenant', 'id') diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index d0546bb..50138ab 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -1,7 +1,7 @@ import functools +import urllib.parse from .common import InfoExtractor -from ..compat import compat_parse_qs from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -71,7 +71,7 @@ class RedGifsBaseInfoExtractor(InfoExtractor): raise ExtractorError('Unable to get temporary token') self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}' - def _call_api(self, ep, video_id, *args, **kwargs): + def _call_api(self, ep, video_id, **kwargs): for first_attempt in True, False: if 'authorization' not in self._API_HEADERS: self._fetch_oauth_token(video_id) @@ -79,7 +79,7 @@ class RedGifsBaseInfoExtractor(InfoExtractor): headers = dict(self._API_HEADERS) headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}' data = self._download_json( - f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, *args, **kwargs) + f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, **kwargs) break except ExtractorError as e: if first_attempt and isinstance(e.cause, HTTPError) and e.cause.status == 401: @@ -130,7 +130,7 @@ class RedGifsIE(RedGifsBaseInfoExtractor): 'categories': list, 'age_limit': 18, 'tags': list, - } + }, }, { 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0', 'info_dict': { @@ -146,7 +146,7 @@ class RedGifsIE(RedGifsBaseInfoExtractor): 'categories': list, 'age_limit': 18, 'tags': list, - } + }, }] def _real_extract(self, url): @@ -166,7 +166,7 @@ class RedGifsSearchIE(RedGifsBaseInfoExtractor): 'info_dict': { 'id': 'tags=Lesbian', 'title': 'Lesbian', - 'description': 'RedGifs search for Lesbian, ordered by trending' + 'description': 'RedGifs search for Lesbian, ordered by trending', }, 'playlist_mincount': 100, }, @@ -175,7 +175,7 @@ class RedGifsSearchIE(RedGifsBaseInfoExtractor): 'info_dict': { 'id': 'type=g&order=latest&tags=Lesbian', 'title': 'Lesbian', - 'description': 'RedGifs search for Lesbian, ordered by latest' + 'description': 'RedGifs search for Lesbian, ordered by latest', }, 'playlist_mincount': 100, }, @@ -184,15 +184,15 @@ class RedGifsSearchIE(RedGifsBaseInfoExtractor): 'info_dict': { 'id': 'type=g&order=latest&tags=Lesbian&page=2', 'title': 'Lesbian', - 'description': 'RedGifs search for Lesbian, ordered by latest' + 'description': 'RedGifs search for Lesbian, ordered by latest', }, 'playlist_count': 80, - } + }, ] def _real_extract(self, url): query_str = self._match_valid_url(url).group('query') - query = compat_parse_qs(query_str) + query = urllib.parse.parse_qs(query_str) if not query.get('tags'): raise ExtractorError('Invalid query tags', expected=True) @@ -220,7 +220,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor): 'info_dict': { 'id': 'lamsinka89', 'title': 'lamsinka89', - 'description': 'RedGifs user lamsinka89, ordered by recent' + 'description': 'RedGifs user lamsinka89, ordered by recent', }, 'playlist_mincount': 100, }, @@ -229,7 +229,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor): 'info_dict': { 'id': 'lamsinka89?page=3', 'title': 'lamsinka89', - 'description': 'RedGifs user lamsinka89, ordered by recent' + 'description': 'RedGifs user lamsinka89, ordered by recent', }, 'playlist_count': 30, }, @@ -238,17 +238,17 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor): 'info_dict': { 'id': 'lamsinka89?order=best&type=g', 'title': 'lamsinka89', - 'description': 'RedGifs user lamsinka89, ordered by best' + 'description': 'RedGifs user lamsinka89, ordered by best', }, 'playlist_mincount': 100, - } + }, ] def _real_extract(self, url): username, query_str = self._match_valid_url(url).group('username', 'query') playlist_id = f'{username}?{query_str}' if query_str else username - query = compat_parse_qs(query_str) + query = urllib.parse.parse_qs(query_str) order = query.get('order', ('recent',))[0] entries = self._paged_entries(f'users/{username}/search', playlist_id, query, { diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 14ed0ed..94a9054 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -53,14 +53,14 @@ class RedTubeIE(InfoExtractor): for patterns, message in ERRORS: if any(p in webpage for p in patterns): raise ExtractorError( - 'Video %s %s' % (video_id, message), expected=True) + f'Video {video_id} {message}', expected=True) info = self._search_json_ld(webpage, video_id, default={}) if not info.get('title'): info['title'] = self._html_search_regex( (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', - r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), + r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), webpage, 'title', group='title', default=None) or self._og_search_title(webpage) diff --git a/yt_dlp/extractor/rentv.py b/yt_dlp/extractor/rentv.py index abb537c..aed4380 100644 --- a/yt_dlp/extractor/rentv.py +++ b/yt_dlp/extractor/rentv.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, @@ -19,7 +18,7 @@ class RENTVIE(InfoExtractor): 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"', 'timestamp': 1472230800, 'upload_date': '20160826', - } + }, }, { 'url': 'http://ren.tv/player/118577', 'only_matching': True, @@ -70,7 +69,7 @@ class RENTVArticleIE(InfoExtractor): 'ext': 'mp4', 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла', 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.', - } + }, }, { # TODO: invalid m3u8 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video', @@ -99,6 +98,6 @@ class RENTVArticleIE(InfoExtractor): media_id = config_profile.get('mediaid') if not media_id: continue - media_id = compat_str(media_id) + media_id = str(media_id) entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id)) return self.playlist_result(entries, display_id) diff --git a/yt_dlp/extractor/restudy.py b/yt_dlp/extractor/restudy.py index f49262a..e10811b 100644 --- a/yt_dlp/extractor/restudy.py +++ b/yt_dlp/extractor/restudy.py @@ -15,7 +15,7 @@ class RestudyIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637', 'only_matching': True, @@ -30,7 +30,7 @@ class RestudyIE(InfoExtractor): description = self._og_search_description(webpage).strip() formats = self._extract_smil_formats( - 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id, + f'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_{video_id}.xml', video_id) return { diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py index 9c9bac6..69a858a 100644 --- a/yt_dlp/extractor/reuters.py +++ b/yt_dlp/extractor/reuters.py @@ -18,25 +18,25 @@ class ReutersIE(InfoExtractor): 'id': '368575562', 'ext': 'mp4', 'title': 'San Francisco police chief resigns', - } + }, } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id) + f'http://www.reuters.com/assets/iframe/yovideo?videoId={video_id}', video_id) video_data = js_to_json(self._search_regex( r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', webpage, 'video data')) def get_json_value(key, fatal=False): - return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) + return self._search_regex(rf'"{key}"\s*:\s*"([^"]+)"', video_data, key, fatal=fatal) title = unescapeHTML(get_json_value('title', fatal=True)) mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() mas_data = self._download_json( - 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid), + f'http://mas-e.cds1.yospace.com/mas/{mmid}/{fid}?trans=json', video_id, transform_source=js_to_json) formats = [] for f in mas_data: diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index 06b6c3c..ddf8c37 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -24,9 +24,9 @@ class ReverbNationIE(InfoExtractor): song_id = self._match_id(url) api_res = self._download_json( - 'https://api.reverbnation.com/song/%s' % song_id, + f'https://api.reverbnation.com/song/{song_id}', song_id, - note='Downloading information of song %s' % song_id + note=f'Downloading information of song {song_id}', ) THUMBNAILS = ('thumbnail', 'image') @@ -36,7 +36,7 @@ class ReverbNationIE(InfoExtractor): if api_res.get(thumb_key): thumbnails.append({ 'url': api_res[thumb_key], - 'preference': quality(thumb_key) + 'preference': quality(thumb_key), }) return { diff --git a/yt_dlp/extractor/ridehome.py b/yt_dlp/extractor/ridehome.py index 78f838a..58722bb 100644 --- a/yt_dlp/extractor/ridehome.py +++ b/yt_dlp/extractor/ridehome.py @@ -55,7 +55,7 @@ class RideHomeIE(InfoExtractor): 'release_date': '20220108', 'release_timestamp': 1641672000, 'duration': 2789.38122, - 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$' + 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$', }, }], }, { @@ -81,7 +81,7 @@ class RideHomeIE(InfoExtractor): 'release_date': '20211026', 'release_timestamp': 1635272124, 'duration': 2266.30531, - 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$' + 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$', }, }], }] diff --git a/yt_dlp/extractor/rinsefm.py b/yt_dlp/extractor/rinsefm.py index f87b895..5bc2eb8 100644 --- a/yt_dlp/extractor/rinsefm.py +++ b/yt_dlp/extractor/rinsefm.py @@ -39,8 +39,8 @@ class RinseFMIE(RinseFMBaseIE): 'title': 'Club Glow - 15/12/2023 - 20:00', 'thumbnail': r're:^https://.+\.(?:jpg|JPG)$', 'release_timestamp': 1702598400, - 'release_date': '20231215' - } + 'release_date': '20231215', + }, }] def _real_extract(self, url): @@ -58,22 +58,22 @@ class RinseFMArtistPlaylistIE(RinseFMBaseIE): 'info_dict': { 'id': 'resources', 'title': '[re]sources', - 'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.' + 'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.', }, - 'playlist_mincount': 40 + 'playlist_mincount': 40, }, { 'url': 'https://rinse.fm/shows/ivy/', 'info_dict': { 'id': 'ivy', 'title': '[IVY]', - 'description': 'A dedicated space for DNB/Turbo House and 4x4.' + 'description': 'A dedicated space for DNB/Turbo House and 4x4.', }, - 'playlist_mincount': 7 + 'playlist_mincount': 7, }] def _entries(self, data): for episode in traverse_obj(data, ( - 'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio) + 'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio), ): yield self._parse_entry(episode) diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py index bc59ed0..e7b1e22 100644 --- a/yt_dlp/extractor/rmcdecouverte.py +++ b/yt_dlp/extractor/rmcdecouverte.py @@ -1,9 +1,7 @@ +import urllib.parse + from .brightcove import BrightcoveLegacyIE from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) from ..utils import smuggle_url @@ -59,7 +57,7 @@ class RMCDecouverteIE(InfoExtractor): webpage = self._download_webpage(url, display_id) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) if brightcove_legacy_url: - brightcove_id = compat_parse_qs(compat_urlparse.urlparse( + brightcove_id = urllib.parse.parse_qs(urllib.parse.urlparse( brightcove_legacy_url).query)['@videoPlayer'][0] else: brightcove_id = self._search_regex( diff --git a/yt_dlp/extractor/rockstargames.py b/yt_dlp/extractor/rockstargames.py index 1662243..02abd48 100644 --- a/yt_dlp/extractor/rockstargames.py +++ b/yt_dlp/extractor/rockstargames.py @@ -19,7 +19,7 @@ class RockstarGamesIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1464876000, 'upload_date': '20160602', - } + }, }, { 'url': 'http://www.rockstargames.com/videos#/?video=48', 'only_matching': True, diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 3bc5f3c..be17465 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -45,7 +45,7 @@ class RokfinIE(InfoExtractor): 'dislike_count': int, 'like_count': int, 'duration': 213, - } + }, }, { 'url': 'https://rokfin.com/post/223/Julian-Assange-Arrested-Streaming-In-Real-Time', 'info_dict': { @@ -63,7 +63,7 @@ class RokfinIE(InfoExtractor): 'dislike_count': int, 'like_count': int, 'tags': ['FreeThinkingMedia^', 'RealProgressives^'], - } + }, }, { 'url': 'https://www.rokfin.com/stream/10543/Its-A-Crazy-Mess-Regional-Director-Blows-Whistle-On-Pfizers-Vaccine-Trial-Data', 'info_dict': { @@ -86,7 +86,7 @@ class RokfinIE(InfoExtractor): 'dislike_count': int, 'like_count': int, 'tags': ['FreeThinkingMedia^'], - } + }, }, { 'url': 'https://rokfin.com/post/126703/Brave-New-World--Aldous-Huxley-DEEPDIVE--Chpts-13--Quite-Frankly--Jay-Dyer', 'info_dict': { @@ -106,7 +106,7 @@ class RokfinIE(InfoExtractor): 'tags': ['FreeThinkingMedia^', 'OpenMind^'], 'description': 'md5:cb04e32e68326c9b2b251b297bacff35', 'duration': 3100, - } + }, }, { 'url': 'https://rokfin.com/stream/31332/The-Grayzone-live-on-Nordstream-blame-game', 'info_dict': { @@ -126,7 +126,7 @@ class RokfinIE(InfoExtractor): 'release_date': '20230310', 'upload_date': '20230310', 'tags': ['FreeThinkingMedia^'], - } + }, }] def _real_extract(self, url): @@ -203,7 +203,7 @@ class RokfinIE(InfoExtractor): 'parent': 'root', 'like_count': int_or_none(comment.get('numLikes')), 'dislike_count': int_or_none(comment.get('numDislikes')), - 'timestamp': unified_timestamp(comment.get('postedAt')) + 'timestamp': unified_timestamp(comment.get('postedAt')), } pages_total = int_or_none(raw_comments.get('totalPages')) or None @@ -246,7 +246,7 @@ class RokfinIE(InfoExtractor): 'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.url).fragment).get('code')[0], 'client_id': 'web', 'grant_type': 'authorization_code', - 'redirect_uri': 'https://rokfin.com/silent-check-sso.html' + 'redirect_uri': 'https://rokfin.com/silent-check-sso.html', })) def _authentication_active(self): @@ -276,7 +276,7 @@ class RokfinIE(InfoExtractor): data=urlencode_postdata({ 'grant_type': 'refresh_token', 'refresh_token': refresh_token, - 'client_id': 'web' + 'client_id': 'web', })) headers['authorization'] = self._get_auth_token() if headers['authorization'] is None: @@ -399,7 +399,7 @@ class RokfinSearchIE(SearchInfoExtractor): 'info_dict': { 'id': '"zelenko"', 'title': '"zelenko"', - } + }, }] _db_url = None _db_access_key = None diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py index 5c62239..8b0099e 100644 --- a/yt_dlp/extractor/roosterteeth.py +++ b/yt_dlp/extractor/roosterteeth.py @@ -83,7 +83,7 @@ class RoosterTeethBaseIE(InfoExtractor): 'availability': self._availability( needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only, is_private=False, is_unlisted=False), - 'tags': attributes.get('genres') + 'tags': attributes.get('genres'), } @@ -254,7 +254,7 @@ class RoosterTeethIE(RoosterTeethBaseIE): if isinstance(e.cause, HTTPError) and e.cause.status == 403: if self._parse_json(e.cause.response.read().decode(), display_id).get('access') is False: self.raise_login_required( - '%s is only available for FIRST members' % display_id) + f'{display_id} is only available for FIRST members') raise # XXX: additional ad-free URL at video_data['links']['download'] but often gives 403 errors @@ -281,7 +281,7 @@ class RoosterTeethIE(RoosterTeethBaseIE): 'display_id': display_id, 'formats': formats, 'subtitles': subtitles, - **self._extract_video_info(episode) + **self._extract_video_info(episode), } diff --git a/yt_dlp/extractor/rottentomatoes.py b/yt_dlp/extractor/rottentomatoes.py index e357175..c229a1c 100644 --- a/yt_dlp/extractor/rottentomatoes.py +++ b/yt_dlp/extractor/rottentomatoes.py @@ -19,7 +19,7 @@ class RottenTomatoesIE(InfoExtractor): 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', - 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.' + 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', }, 'skip': 'No longer available', }, { @@ -30,7 +30,7 @@ class RottenTomatoesIE(InfoExtractor): 'title': 'Toy Story 3: Trailer 2', 'description': '', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 149.941 + 'duration': 149.941, }, }, { 'url': 'http://www.rottentomatoes.com/m/toy_story_3', diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py index 411a625..4a10720 100644 --- a/yt_dlp/extractor/rozhlas.py +++ b/yt_dlp/extractor/rozhlas.py @@ -23,8 +23,8 @@ class RozhlasIE(InfoExtractor): 'id': '3421320', 'ext': 'mp3', 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)', - 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let' - } + 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let', + }, }, { 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', 'only_matching': True, @@ -34,7 +34,7 @@ class RozhlasIE(InfoExtractor): audio_id = self._match_id(url) webpage = self._download_webpage( - 'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id) + f'http://prehravac.rozhlas.cz/audio/{audio_id}', audio_id) title = self._html_search_regex( r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', @@ -48,7 +48,7 @@ class RozhlasIE(InfoExtractor): return { 'id': audio_id, - 'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id, + 'url': f'http://media.rozhlas.cz/_audio/{audio_id}.mp3', 'title': title, 'description': description, 'duration': duration, @@ -110,7 +110,7 @@ class RozhlasVltavaIE(RozhlasBaseIE): 'artist': 'Aleš Stuchlý', 'channel_id': 'radio-wave', }, - }] + }], }, { 'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744', 'info_dict': { @@ -183,7 +183,7 @@ class RozhlasVltavaIE(RozhlasBaseIE): 'chapter': 'Neklid #5', 'chapter_number': 5, }, - }] + }], }, { 'url': 'https://dvojka.rozhlas.cz/karel-siktanc-cerny-jezdec-bily-kun-napinava-pohadka-o-tajemnem-prizraku-8946969', 'info_dict': { @@ -220,7 +220,7 @@ class RozhlasVltavaIE(RozhlasBaseIE): 'duration': ('duration', {int_or_none}), 'artist': ('meta', 'ga', 'contentAuthor'), 'channel_id': ('meta', 'ga', 'contentCreator'), - }) + }), } def _real_extract(self, url): @@ -321,7 +321,7 @@ class MujRozhlasIE(RozhlasBaseIE): 'timestamp': ('attributes', 'since', {unified_timestamp}), 'modified_timestamp': ('attributes', 'updated', {unified_timestamp}), 'thumbnail': ('attributes', 'asset', 'url', {url_or_none}), - }) + }), } def _entries(self, api_url, playlist_id): diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py index 729804d..42ee9d2 100644 --- a/yt_dlp/extractor/rte.py +++ b/yt_dlp/extractor/rte.py @@ -35,7 +35,7 @@ class RteBaseIE(InfoExtractor): error_info = self._parse_json(ee.cause.response.read().decode(), item_id, fatal=False) if error_info: raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error_info['message']), + '{} said: {}'.format(self.IE_NAME, error_info['message']), expected=True) raise diff --git a/yt_dlp/extractor/rtl2.py b/yt_dlp/extractor/rtl2.py index 07e1aa3..86e383a 100644 --- a/yt_dlp/extractor/rtl2.py +++ b/yt_dlp/extractor/rtl2.py @@ -13,7 +13,7 @@ class RTL2IE(InfoExtractor): 'id': 'folge-203-0', 'ext': 'f4v', 'title': 'GRIP sucht den Sommerkönig', - 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f' + 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f', }, 'params': { # rtmp download @@ -26,7 +26,7 @@ class RTL2IE(InfoExtractor): 'id': 'anna-erwischt-alex', 'ext': 'mp4', 'title': 'Anna erwischt Alex!', - 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' + 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.', }, 'params': { # rtmp download diff --git a/yt_dlp/extractor/rtlnl.py b/yt_dlp/extractor/rtlnl.py index 724cb64..4537b3d 100644 --- a/yt_dlp/extractor/rtlnl.py +++ b/yt_dlp/extractor/rtlnl.py @@ -57,7 +57,7 @@ class RtlNlIE(InfoExtractor): 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', 'upload_date': '20150215', 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', - } + }, }, { # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275) # best format available nettv @@ -99,14 +99,14 @@ class RtlNlIE(InfoExtractor): def _real_extract(self, url): uuid = self._match_id(url) info = self._download_json( - 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid, + f'http://www.rtl.nl/system/s4m/vfd/version=2/uuid={uuid}/fmt=adaptive/', uuid) material = info['material'][0] title = info['abstracts'][0]['name'] subtitle = material.get('title') if subtitle: - title += ' - %s' % subtitle + title += f' - {subtitle}' description = material.get('synopsis') meta = info.get('meta', {}) @@ -129,7 +129,7 @@ class RtlNlIE(InfoExtractor): r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), 'height': int_or_none(self._search_regex( r'/sz=[0-9]+x([0-9]+)', - meta[p], 'thumbnail height', fatal=False)) + meta[p], 'thumbnail height', fatal=False)), }) return { @@ -196,7 +196,7 @@ class RTLLuTeleVODIE(RTLLuBaseIE): 'ext': 'mp4', 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg', 'description': 'md5:b1db974408cc858c9fd241812e4a2a14', - } + }, }, { 'url': 'https://www.rtl.lu/video/3295215', 'info_dict': { @@ -205,7 +205,7 @@ class RTLLuTeleVODIE(RTLLuBaseIE): 'ext': 'mp4', 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg', 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b', - } + }, }] @@ -221,7 +221,7 @@ class RTLLuArticleIE(RTLLuBaseIE): 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg', 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7', 'title': 'md5:40aa85f135578fbd549d3c9370321f99', - } + }, }, { # 5minutes 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html', @@ -231,7 +231,7 @@ class RTLLuArticleIE(RTLLuBaseIE): 'description': 'md5:ac031da0740e997a5cf4633173634fee', 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46', 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg', - } + }, }, { # today.lu 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html', @@ -241,7 +241,7 @@ class RTLLuArticleIE(RTLLuBaseIE): 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower', 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...', 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg', - } + }, }] @@ -256,7 +256,7 @@ class RTLLuLiveIE(RTLLuBaseIE): 'live_status': 'is_live', 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg', - } + }, }, { # Tele:live-2 'url': 'https://www.rtl.lu/tele/live-2', @@ -266,7 +266,7 @@ class RTLLuLiveIE(RTLLuBaseIE): 'live_status': 'is_live', 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg', - } + }, }, { # Radio:lauschteren 'url': 'https://www.rtl.lu/radio/lauschteren', @@ -276,7 +276,7 @@ class RTLLuLiveIE(RTLLuBaseIE): 'live_status': 'is_live', 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg', - } + }, }] @@ -290,5 +290,5 @@ class RTLLuRadioIE(RTLLuBaseIE): 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9', 'title': '5 vir 12 - Stau um Stau', 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg', - } + }, }] diff --git a/yt_dlp/extractor/rtnews.py b/yt_dlp/extractor/rtnews.py index 6be9945..558511f 100644 --- a/yt_dlp/extractor/rtnews.py +++ b/yt_dlp/extractor/rtnews.py @@ -14,7 +14,7 @@ class RTNewsIE(InfoExtractor): 'id': '546301', 'title': 'Crowds gather to greet deported Djokovic as he returns to Serbia (VIDEO)', 'description': 'md5:1d5bfe1a988d81fd74227cfdf93d314d', - 'thumbnail': 'https://cdni.rt.com/files/2022.01/article/61e587a085f540102c3386c1.png' + 'thumbnail': 'https://cdni.rt.com/files/2022.01/article/61e587a085f540102c3386c1.png', }, }, { 'url': 'https://www.rt.com/shows/in-question/535980-plot-to-assassinate-julian-assange/', @@ -23,7 +23,7 @@ class RTNewsIE(InfoExtractor): 'id': '535980', 'title': 'The plot to assassinate Julian Assange', 'description': 'md5:55279ce5e4441dc1d16e2e4a730152cd', - 'thumbnail': 'https://cdni.rt.com/files/2021.09/article/615226f42030274e8879b53d.png' + 'thumbnail': 'https://cdni.rt.com/files/2021.09/article/615226f42030274e8879b53d.png', }, 'playlist': [{ 'info_dict': { @@ -31,7 +31,7 @@ class RTNewsIE(InfoExtractor): 'ext': 'mp4', 'title': '6152271d85f5400464496162', }, - }] + }], }] def _entries(self, webpage): @@ -46,12 +46,12 @@ class RTNewsIE(InfoExtractor): } def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) return { '_type': 'playlist', - 'id': id, + 'id': playlist_id, 'entries': self._entries(webpage), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), @@ -71,9 +71,9 @@ class RTDocumentryIE(InfoExtractor): 'description': 'md5:647c76984b7cb9a8b52a567e87448d88', 'thumbnail': 'https://cdni.rt.com/rtd-files/films/escobars-hitman/escobars-hitman_11.jpg', 'average_rating': 8.53, - 'duration': 3134.0 + 'duration': 3134.0, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/iskander-tactical-system-natos-headache/', 'info_dict': { @@ -86,9 +86,9 @@ class RTDocumentryIE(InfoExtractor): 'duration': 274.0, 'timestamp': 1605726000, 'view_count': int, - 'upload_date': '20201118' + 'upload_date': '20201118', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/introduction-to-safe-digital-life-ep2/', 'info_dict': { @@ -101,29 +101,29 @@ class RTDocumentryIE(InfoExtractor): 'duration': 1524.0, 'timestamp': 1636977600, 'view_count': int, - 'upload_date': '20211115' + 'upload_date': '20211115', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) ld_json = self._search_json_ld(webpage, None, fatal=False) if not ld_json: self.raise_no_formats('No video/audio found at the provided url.', expected=True) media_json = self._parse_json( self._search_regex(r'(?s)\'Med\'\s*:\s*\[\s*({.+})\s*\]\s*};', webpage, 'media info'), - id, transform_source=js_to_json) + video_id, transform_source=js_to_json) if 'title' not in ld_json and 'title' in media_json: ld_json['title'] = media_json['title'] formats = [{'url': src['file']} for src in media_json.get('sources') or [] if src.get('file')] return { - 'id': id, + 'id': video_id, 'thumbnail': media_json.get('image'), 'formats': formats, - **ld_json + **ld_json, } @@ -144,23 +144,23 @@ class RTDocumentryPlaylistIE(InfoExtractor): }, }] - def _entries(self, webpage, id): + def _entries(self, webpage, playlist_id): video_urls = set(re.findall(r'list-2__link\s*"\s*href="([^"]+)"', webpage)) for v_url in video_urls: - if id not in v_url: + if playlist_id not in v_url: continue yield self.url_result( - 'https://rtd.rt.com%s' % v_url, + f'https://rtd.rt.com{v_url}', ie=RTDocumentryIE.ie_key()) def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) return { '_type': 'playlist', - 'id': id, - 'entries': self._entries(webpage, id), + 'id': playlist_id, + 'entries': self._entries(webpage, playlist_id), } @@ -174,20 +174,20 @@ class RuptlyIE(InfoExtractor): 'ext': 'mp4', 'title': 'Japan: Double trouble! Tokyo zoo presents adorable panda twins | Video Ruptly', 'description': 'md5:85a8da5fdb31486f0562daf4360ce75a', - 'thumbnail': 'https://storage.ruptly.tv/thumbnails/20220112-020/i6JQKnTNpYuqaXsR/i6JQKnTNpYuqaXsR.jpg' + 'thumbnail': 'https://storage.ruptly.tv/thumbnails/20220112-020/i6JQKnTNpYuqaXsR/i6JQKnTNpYuqaXsR.jpg', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) m3u8_url = self._search_regex(r'preview_url"\s?:\s?"(https?://storage\.ruptly\.tv/video_projects/.+\.m3u8)"', webpage, 'm3u8 url', fatal=False) if not m3u8_url: self.raise_no_formats('No video/audio found at the provided url.', expected=True) - formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, id, ext='mp4') + formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, ext='mp4') return { - 'id': id, + 'id': video_id, 'formats': formats, 'subtitles': subs, 'title': self._og_search_title(webpage), diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py index ec78d0a..944e863 100644 --- a/yt_dlp/extractor/rtp.py +++ b/yt_dlp/extractor/rtp.py @@ -35,7 +35,7 @@ class RTPIE(InfoExtractor): data = self._RX_OBFUSCATION.sub( lambda m: json.dumps( base64.b64decode(urllib.parse.unquote( - ''.join(self._parse_json(m.group(1), video_id)) + ''.join(self._parse_json(m.group(1), video_id)), )).decode('iso-8859-1')), data) return js_to_json(data) diff --git a/yt_dlp/extractor/rtrfm.py b/yt_dlp/extractor/rtrfm.py index 7381d82..7421e42 100644 --- a/yt_dlp/extractor/rtrfm.py +++ b/yt_dlp/extractor/rtrfm.py @@ -56,8 +56,8 @@ class RTRFMIE(InfoExtractor): url = None self.raise_no_formats('Expired or no episode on this date', expected=True) return { - 'id': '%s-%s' % (show, date), - 'title': '%s %s' % (title, date), + 'id': f'{show}-{date}', + 'title': f'{title} {date}', 'series': title, 'url': url, 'release_date': date, diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py index bce5cba..dc1e2d3 100644 --- a/yt_dlp/extractor/rts.py +++ b/yt_dlp/extractor/rts.py @@ -1,7 +1,6 @@ import re from .srgssr import SRGSSRIE -from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, @@ -110,7 +109,7 @@ class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE { 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', 'only_matching': True, - } + }, ] def _real_extract(self, url): @@ -120,7 +119,7 @@ class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE def download_json(internal_id): return self._download_json( - 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, + f'http://www.rts.ch/a/{internal_id}.html?f=json/article', display_id) all_info = download_json(media_id) @@ -149,7 +148,7 @@ class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', page) if videos: - entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos] + entries = [self.url_result(f'srgssr:{video_urn}', 'SRGSSR') for video_urn in videos] if entries: return self.playlist_result(entries, media_id, all_info.get('title')) @@ -196,7 +195,7 @@ class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE 'tbr': extract_bitrate(format_url), }) - download_base = 'http://rtsww%s-d.rts.ch/' % ('-a' if media_type == 'audio' else '') + download_base = 'http://rtsww{}-d.rts.ch/'.format('-a' if media_type == 'audio' else '') for media in info.get('media', []): media_url = media.get('url') if not media_url or re.match(r'https?://', media_url): @@ -215,7 +214,7 @@ class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE self._check_formats(formats, media_id) duration = info.get('duration') or info.get('cutout') or info.get('cutduration') - if isinstance(duration, compat_str): + if isinstance(duration, str): duration = parse_duration(duration) return { diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py index e7dcd5f..5b0eee9 100644 --- a/yt_dlp/extractor/rtvcplay.py +++ b/yt_dlp/extractor/rtvcplay.py @@ -213,7 +213,7 @@ class RTVCPlayEmbedIE(RTVCPlayBaseIE): 'title': 'Tráiler: Señoritas', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'ext': 'mp4', - } + }, }] def _real_extract(self, url): @@ -235,7 +235,7 @@ class RTVCPlayEmbedIE(RTVCPlayBaseIE): 'title': 'title', 'description': 'description', 'thumbnail': ('image', ..., 'thumbnail', 'path'), - }, get_all=False) + }, get_all=False), } @@ -282,5 +282,5 @@ class RTVCKalturaIE(RTVCPlayBaseIE): 'title': 'title', 'description': 'description', 'thumbnail': ('channel', 'image', 'logo', 'path'), - }) + }), } diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index a99a266..7e0b666 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -3,7 +3,6 @@ import io import struct from .common import InfoExtractor -from ..compat import compat_b64decode from ..utils import ( ExtractorError, determine_ext, @@ -62,14 +61,14 @@ class RTVEALaCartaIE(InfoExtractor): }] def _real_initialize(self): - user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode('utf-8')).decode('utf-8') + user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8') self._manager = self._download_json( 'http://www.rtve.es/odin/loki/' + user_agent_b64, None, 'Fetching manager info')['manager'] @staticmethod def _decrypt_url(png): - encrypted_data = io.BytesIO(compat_b64decode(png)[8:]) + encrypted_data = io.BytesIO(base64.b64decode(png)[8:]) while True: length = struct.unpack('!I', encrypted_data.read(4))[0] chunk_type = encrypted_data.read(4) @@ -111,7 +110,7 @@ class RTVEALaCartaIE(InfoExtractor): def _extract_png_formats(self, video_id): png = self._download_webpage( - 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id), + f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png', video_id, 'Downloading url information', query={'q': 'v2'}) q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) formats = [] @@ -135,7 +134,7 @@ class RTVEALaCartaIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) info = self._download_json( - 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, + f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json', video_id)['page']['items'][0] if info['state'] == 'DESPU': raise ExtractorError('The video is no longer available', expected=True) @@ -194,7 +193,7 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE 'title': 'Ignatius Farray', 'thumbnail': r're:https?://.+/1613243011863.jpg', 'duration': 3559.559, - 'series': 'En Radio 3' + 'series': 'En Radio 3', }, }, { 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', @@ -205,7 +204,7 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE 'title': 'Capítulo 26 y último: La muerte de Victor', 'thumbnail': r're:https?://.+/1632147445707.jpg', 'duration': 3174.086, - 'series': 'Frankenstein o el moderno Prometeo' + 'series': 'Frankenstein o el moderno Prometeo', }, }] @@ -217,8 +216,7 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE media url """ png = self._download_webpage( - 'http://www.rtve.es/ztnr/movil/thumbnail/%s/audios/%s.png' % - (self._manager, audio_id), + f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png', audio_id, 'Downloading url information', query={'q': 'v2'}) q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) formats = [] @@ -242,7 +240,7 @@ class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE def _real_extract(self, url): audio_id = self._match_id(url) info = self._download_json( - 'https://www.rtve.es/api/audios/%s.json' % audio_id, + f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0] return { @@ -288,7 +286,7 @@ class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE }, 'params': { 'skip_download': 'live stream', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py index defb8d7..927da57 100644 --- a/yt_dlp/extractor/rtvs.py +++ b/yt_dlp/extractor/rtvs.py @@ -21,7 +21,7 @@ class RTVSIE(InfoExtractor): 'duration': 2854, 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg', 'display_id': '135331', - } + }, }, { # tv archive 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', @@ -34,7 +34,7 @@ class RTVSIE(InfoExtractor): 'timestamp': 1428555900, 'upload_date': '20150409', 'duration': 4986, - } + }, }, { # tv archive 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin', @@ -48,7 +48,7 @@ class RTVSIE(InfoExtractor): 'duration': 831, 'upload_date': '20211111', 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg', - } + }, }] def _real_extract(self, url): @@ -80,5 +80,5 @@ class RTVSIE(InfoExtractor): 'duration': parse_duration(traverse_obj(data, ('playlist', 0, 'length'))), 'thumbnail': traverse_obj(data, ('playlist', 0, 'image')), 'timestamp': unified_timestamp(traverse_obj(data, ('playlist', 0, 'datetime_create'))), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/rtvslo.py b/yt_dlp/extractor/rtvslo.py index 39ace7c..9c2e6fb 100644 --- a/yt_dlp/extractor/rtvslo.py +++ b/yt_dlp/extractor/rtvslo.py @@ -1,3 +1,5 @@ +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -6,6 +8,7 @@ from ..utils import ( traverse_obj, unified_timestamp, url_or_none, + urljoin, ) @@ -21,75 +24,73 @@ class RTVSLOIE(InfoExtractor): _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622' SUB_LANGS_MAP = {'Slovenski': 'sl'} - _TESTS = [ - { - 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv', - 'info_dict': { - 'id': '174842550', - 'ext': 'mp4', - 'release_timestamp': 1643140032, - 'upload_date': '20220125', - 'series': 'Dnevnik', - 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg', - 'description': 'md5:76a18692757aeb8f0f51221106277dd2', - 'timestamp': 1643137046, - 'title': 'Dnevnik', - 'series_id': '92', - 'release_date': '20220125', - 'duration': 1789, - }, - }, { - 'url': 'https://365.rtvslo.si/arhiv/utrip/174843754', - 'info_dict': { - 'id': '174843754', - 'ext': 'mp4', - 'series_id': '94', - 'release_date': '20220129', - 'timestamp': 1643484455, - 'title': 'Utrip', - 'duration': 813, - 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg', - 'description': 'md5:77f2892630c7b17bb7a5bb84319020c9', - 'release_timestamp': 1643485825, - 'upload_date': '20220129', - 'series': 'Utrip', - }, - }, { - 'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609', - 'info_dict': { - 'id': '174844609', - 'ext': 'mp3', - 'series_id': '106615841', - 'title': 'Il giornale della sera', - 'duration': 1328, - 'series': 'Il giornale della sera', - 'timestamp': 1643743800, - 'release_timestamp': 1643745424, - 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg', - 'upload_date': '20220201', - 'tbr': 128000, - 'release_date': '20220201', - }, - }, { - 'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750', - 'info_dict': { - 'id': '148350750', - 'ext': 'mp4', - 'title': 'Prvi šolski dan, mozaična oddaja za mlade', - 'series': 'Razred zase', - 'series_id': '148185730', - 'duration': 1481, - 'upload_date': '20121019', - 'timestamp': 1350672122, - 'release_date': '20121019', - 'release_timestamp': 1350672122, - 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg', - }, - }, { - 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550', - 'only_matching': True - } - ] + _TESTS = [{ + 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv', + 'info_dict': { + 'id': '174842550', + 'ext': 'mp4', + 'release_timestamp': 1643140032, + 'upload_date': '20220125', + 'series': 'Dnevnik', + 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg', + 'description': 'md5:76a18692757aeb8f0f51221106277dd2', + 'timestamp': 1643137046, + 'title': 'Dnevnik', + 'series_id': '92', + 'release_date': '20220125', + 'duration': 1789, + }, + }, { + 'url': 'https://365.rtvslo.si/arhiv/utrip/174843754', + 'info_dict': { + 'id': '174843754', + 'ext': 'mp4', + 'series_id': '94', + 'release_date': '20220129', + 'timestamp': 1643484455, + 'title': 'Utrip', + 'duration': 813, + 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg', + 'description': 'md5:77f2892630c7b17bb7a5bb84319020c9', + 'release_timestamp': 1643485825, + 'upload_date': '20220129', + 'series': 'Utrip', + }, + }, { + 'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609', + 'info_dict': { + 'id': '174844609', + 'ext': 'mp3', + 'series_id': '106615841', + 'title': 'Il giornale della sera', + 'duration': 1328, + 'series': 'Il giornale della sera', + 'timestamp': 1643743800, + 'release_timestamp': 1643745424, + 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg', + 'upload_date': '20220201', + 'tbr': 128000, + 'release_date': '20220201', + }, + }, { + 'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750', + 'info_dict': { + 'id': '148350750', + 'ext': 'mp4', + 'title': 'Prvi šolski dan, mozaična oddaja za mlade', + 'series': 'Razred zase', + 'series_id': '148185730', + 'duration': 1481, + 'upload_date': '20121019', + 'timestamp': 1350672122, + 'release_date': '20121019', + 'release_timestamp': 1350672122, + 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg', + }, + }, { + 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550', + 'only_matching': True, + }] def _real_extract(self, url): v_id = self._match_id(url) @@ -127,7 +128,7 @@ class RTVSLOIE(InfoExtractor): 'format_note': 'Sign language interpretation', 'preference': -10, 'language': ( 'slv' if f.get('language') == 'eng' and f.get('acodec') != 'none' - else f.get('language')) + else f.get('language')), }) for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))): @@ -164,3 +165,26 @@ class RTVSLOIE(InfoExtractor): 'series': meta.get('showName'), 'series_id': meta.get('showId'), } + + +class RTVSLOShowIE(InfoExtractor): + IE_NAME = 'rtvslo.si:show' + _VALID_URL = r'https?://(?:365|4d)\.rtvslo.si/oddaja/[^/?#&]+/(?P<id>\d+)' + + _TESTS = [{ + 'url': 'https://365.rtvslo.si/oddaja/ekipa-bled/173250997', + 'info_dict': { + 'id': '173250997', + 'title': 'Ekipa Bled', + }, + 'playlist_count': 18, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + return self.playlist_from_matches( + re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage), + playlist_id, self._html_extract_title(webpage), + getter=lambda x: urljoin('https://365.rtvslo.si', x), ie=RTVSLOIE) diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py index 11095b2..3630f5e 100644 --- a/yt_dlp/extractor/rule34video.py +++ b/yt_dlp/extractor/rule34video.py @@ -38,8 +38,8 @@ class Rule34VideoIE(InfoExtractor): 'uploader': 'Sweet HMV', 'uploader_url': 'https://rule34video.com/members/22119/', 'categories': ['3D', 'MMD', 'iwara'], - 'tags': 'mincount:10' - } + 'tags': 'mincount:10', + }, }, { 'url': 'https://rule34video.com/videos/3065296/lara-in-trouble-ep-7-wildeerstudio/', @@ -61,8 +61,8 @@ class Rule34VideoIE(InfoExtractor): 'uploader': 'CerZule', 'uploader_url': 'https://rule34video.com/members/36281/', 'categories': ['3D', 'Tomb Raider'], - 'tags': 'mincount:40' - } + 'tags': 'mincount:40', + }, }, ] diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index 837a324..db780a2 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -37,7 +37,7 @@ class RumbleEmbedIE(InfoExtractor): 'duration': 234, 'uploader': 'WMAR', 'live_status': 'not_live', - } + }, }, { 'url': 'https://rumble.com/embed/vslb7v', 'md5': '7418035de1a30a178b8af34dc2b6a52b', @@ -53,7 +53,7 @@ class RumbleEmbedIE(InfoExtractor): 'duration': 901, 'uploader': 'CTNews', 'live_status': 'not_live', - } + }, }, { 'url': 'https://rumble.com/embed/vunh1h', 'info_dict': { @@ -73,12 +73,12 @@ class RumbleEmbedIE(InfoExtractor): { 'url': r're:https://.+\.vtt', 'name': 'English', - 'ext': 'vtt' - } - ] + 'ext': 'vtt', + }, + ], }, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://rumble.com/embed/v1essrt', 'info_dict': { @@ -93,7 +93,7 @@ class RumbleEmbedIE(InfoExtractor): 'uploader': 'Lofi Girl', 'live_status': 'is_live', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://rumble.com/embed/v1amumr', 'info_dict': { @@ -110,7 +110,7 @@ class RumbleEmbedIE(InfoExtractor): 'uploader': 'Rumble Events', 'live_status': 'was_live', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'only_matching': True, @@ -133,7 +133,7 @@ class RumbleEmbedIE(InfoExtractor): 'uploader': 'Mr Producer Media', 'upload_date': '20220610', 'live_status': 'not_live', - } + }, }, ] @@ -198,7 +198,7 @@ class RumbleEmbedIE(InfoExtractor): 'filesize': 'size', 'width': 'w', 'height': 'h', - }, expected_type=lambda x: int(x) or None) + }, expected_type=lambda x: int(x) or None), }) subtitles = { @@ -258,7 +258,7 @@ class RumbleIE(InfoExtractor): 'dislike_count': int, 'view_count': int, 'live_status': 'not_live', - } + }, }, { 'url': 'http://www.rumble.com/vDMUM1?key=value', 'only_matching': True, @@ -353,7 +353,7 @@ class RumbleIE(InfoExtractor): r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)), 'dislike_count': parse_count(self._search_regex( r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)), - 'description': clean_html(get_element_by_class('media-description', webpage)) + 'description': clean_html(get_element_by_class('media-description', webpage)), } @@ -377,7 +377,7 @@ class RumbleChannelIE(InfoExtractor): def entries(self, url, playlist_id): for page in itertools.count(1): try: - webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page) + webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note=f'Downloading page {page}') except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 404: break diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index eb12f32..d389b32 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -1,9 +1,6 @@ import itertools from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( bool_or_none, determine_ext, @@ -21,7 +18,7 @@ class RutubeBaseIE(InfoExtractor): query = {} query['format'] = 'json' return self._download_json( - 'http://rutube.ru/api/video/%s/' % video_id, + f'http://rutube.ru/api/video/{video_id}/', video_id, 'Downloading video JSON', 'Unable to download video JSON', query=query) @@ -44,7 +41,7 @@ class RutubeBaseIE(InfoExtractor): 'thumbnail': video.get('thumbnail_url'), 'duration': duration, 'uploader': try_get(video, lambda x: x['author']['name']), - 'uploader_id': compat_str(uploader_id) if uploader_id else None, + 'uploader_id': str(uploader_id) if uploader_id else None, 'timestamp': unified_timestamp(video.get('created_ts')), 'categories': [category] if category else None, 'age_limit': age_limit, @@ -63,7 +60,7 @@ class RutubeBaseIE(InfoExtractor): query = {} query['format'] = 'json' return self._download_json( - 'http://rutube.ru/api/play/options/%s/' % video_id, + f'http://rutube.ru/api/play/options/{video_id}/', video_id, 'Downloading options JSON', 'Unable to download options JSON', headers=self.geo_verification_headers(), query=query) @@ -171,7 +168,7 @@ class RutubeIE(RutubeBaseIE): @classmethod def suitable(cls, url): - return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url) + return False if RutubePlaylistIE.suitable(url) else super().suitable(url) def _real_extract(self, url): video_id = self._match_id(url) @@ -236,7 +233,7 @@ class RutubePlaylistBaseIE(RutubeBaseIE): page = self._download_json( next_page_url or self._next_page_url( pagenum, playlist_id, *args, **kwargs), - playlist_id, 'Downloading page %s' % pagenum) + playlist_id, f'Downloading page {pagenum}') results = page.get('results') if not results or not isinstance(results, list): @@ -335,7 +332,7 @@ class RutubePlaylistIE(RutubePlaylistBaseIE): def suitable(cls, url): from ..utils import int_or_none, parse_qs - if not super(RutubePlaylistIE, cls).suitable(url): + if not super().suitable(url): return False params = parse_qs(url) return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index 726d491..9bd26ad 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -124,18 +124,18 @@ class RUTVIE(InfoExtractor): is_live = video_type == 'live' json_data = self._download_json( - 'http://player.vgtrk.com/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id), + 'http://player.vgtrk.com/iframe/data{}/id/{}'.format('live' if is_live else 'video', video_id), video_id, 'Downloading JSON') if json_data['errors']: - raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, json_data['errors']), expected=True) playlist = json_data['data']['playlist'] medialist = playlist['medialist'] media = medialist[0] if media['errors']: - raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, media['errors']), expected=True) view_count = int_or_none(playlist.get('count_views')) priority_transport = playlist['priority_transport'] @@ -175,12 +175,12 @@ class RUTVIE(InfoExtractor): continue else: fmt = { - 'url': url + 'url': url, } fmt.update({ 'width': int_or_none(quality, default=height, invscale=width, scale=height), 'height': int_or_none(quality, default=height), - 'format_id': '%s-%s' % (transport, quality), + 'format_id': f'{transport}-{quality}', 'source_preference': preference, }) formats.append(fmt) diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index dc61387..2dddb39 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -1,8 +1,8 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse from ..utils import ( ExtractorError, determine_ext, @@ -164,7 +164,7 @@ class RuutuIE(InfoExtractor): video_id = self._match_id(url) video_xml = self._download_xml( - '%s/media-xml-cache' % self._API_BASE, video_id, + f'{self._API_BASE}/media-xml-cache', video_id, query={'id': video_id}) formats = [] @@ -182,8 +182,8 @@ class RuutuIE(InfoExtractor): processed_urls.append(video_url) ext = determine_ext(video_url) auth_video_url = url_or_none(self._download_webpage( - '%s/auth/access/v2' % self._API_BASE, video_id, - note='Downloading authenticated %s stream URL' % ext, + f'{self._API_BASE}/auth/access/v2', video_id, + note=f'Downloading authenticated {ext} stream URL', fatal=False, query={'stream': video_url})) if auth_video_url: processed_urls.append(auth_video_url) @@ -209,16 +209,16 @@ class RuutuIE(InfoExtractor): 'vcodec': 'none', }) else: - proto = compat_urllib_parse_urlparse(video_url).scheme + proto = urllib.parse.urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': continue preference = -1 if proto == 'rtmp' else 1 label = child.get('label') tbr = int_or_none(child.get('bitrate')) - format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto + format_id = f'{proto}-{label if label else tbr}' if label or tbr else proto if not self._is_valid_url(video_url, video_id, format_id): continue - width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] + width, height = (int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]) formats.append({ 'format_id': format_id, 'url': video_url, @@ -242,7 +242,7 @@ class RuutuIE(InfoExtractor): self.report_drm(video_id) ns_st_cds = pv('ns_st_cds') if ns_st_cds != 'free': - raise ExtractorError('This video is %s.' % ns_st_cds, expected=True) + raise ExtractorError(f'This video is {ns_st_cds}.', expected=True) themes = pv('themes') diff --git a/yt_dlp/extractor/ruv.py b/yt_dlp/extractor/ruv.py index 12499d6..5e55a40 100644 --- a/yt_dlp/extractor/ruv.py +++ b/yt_dlp/extractor/ruv.py @@ -131,10 +131,10 @@ class RuvSpilaIE(InfoExtractor): }, }, { 'url': 'https://www.ruv.is/ungruv/spila/ungruv/28046/8beuph', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.ruv.is/krakkaruv/spila/krakkafrettir/30712/9jbgb0', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -153,7 +153,7 @@ class RuvSpilaIE(InfoExtractor): } } } - }''' % (series_id, display_id)})['data']['Program'] + }''' % (series_id, display_id)})['data']['Program'] # noqa: UP031 episode = program['episodes'][0] subs = {} @@ -182,5 +182,5 @@ class RuvSpilaIE(InfoExtractor): 'timestamp': unified_timestamp(episode.get('firstrun')), 'formats': formats, 'age_limit': episode.get('rating'), - 'chapters': clips + 'chapters': clips, } diff --git a/yt_dlp/extractor/s4c.py b/yt_dlp/extractor/s4c.py index 67eff72..6eb8b2b 100644 --- a/yt_dlp/extractor/s4c.py +++ b/yt_dlp/extractor/s4c.py @@ -12,7 +12,7 @@ class S4CIE(InfoExtractor): 'title': 'Y Swn', 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0', 'duration': 5340, - 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg' + 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg', }, }, { 'url': 'https://www.s4c.cymru/clic/programme/856636948', @@ -22,7 +22,7 @@ class S4CIE(InfoExtractor): 'title': 'Am Dro', 'duration': 2880, 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', - 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg' + 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg', }, }] @@ -94,7 +94,7 @@ class S4CSeriesIE(InfoExtractor): 'https://www.s4c.cymru/df/series_details', series_id, query={ 'lang': 'e', 'series_id': series_id, - 'show_prog_in_series': 'Y' + 'show_prog_in_series': 'Y', }, note='Downloading series details JSON') return self.playlist_result( diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index 17dff0a..86f34df 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -1,11 +1,8 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) from ..utils import ( ExtractorError, update_url_query, @@ -34,9 +31,9 @@ class SafariBaseIE(InfoExtractor): return redirect_url = urlh.url - parsed_url = compat_urlparse.urlparse(redirect_url) - qs = compat_parse_qs(parsed_url.query) - next_uri = compat_urlparse.urljoin( + parsed_url = urllib.parse.urlparse(redirect_url) + qs = urllib.parse.parse_qs(parsed_url.query) + next_uri = urllib.parse.urljoin( 'https://api.oreilly.com', qs['next'][0]) auth, urlh = self._download_json_handle( @@ -54,7 +51,7 @@ class SafariBaseIE(InfoExtractor): if (not auth.get('logged_in') and not auth.get('redirect_uri') and credentials): raise ExtractorError( - 'Unable to login: %s' % credentials, expected=True) + f'Unable to login: {credentials}', expected=True) # oreilly serves two same instances of the following cookies # in Set-Cookie header and expects first one to be actually set @@ -62,7 +59,7 @@ class SafariBaseIE(InfoExtractor): self._apply_first_set_cookie_header(urlh, cookie) _, urlh = self._download_webpage_handle( - auth.get('redirect_uri') or next_uri, None, 'Completing login',) + auth.get('redirect_uri') or next_uri, None, 'Completing login') if is_logged(urlh): self.LOGGED_IN = True @@ -124,7 +121,7 @@ class SafariIE(SafariBaseIE): partner_id = self._PARTNER_ID ui_id = self._UICONF_ID else: - video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part')) + video_id = '{}-{}'.format(mobj.group('course_id'), mobj.group('part')) webpage, urlh = self._download_webpage_handle(url, video_id) @@ -144,14 +141,14 @@ class SafariIE(SafariBaseIE): group='id') query = { - 'wid': '_%s' % partner_id, + 'wid': f'_{partner_id}', 'uiconf_id': ui_id, 'flashvars[referenceId]': reference_id, } if self.LOGGED_IN: kaltura_session = self._download_json( - '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), + f'{self._API_BASE}/player/kaltura_session/?reference_id={reference_id}', video_id, 'Downloading kaltura session JSON', 'Unable to download kaltura session JSON', fatal=False, headers={'Accept': 'application/json'}) @@ -180,7 +177,7 @@ class SafariApiIE(SafariBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) part = self._download_json( - url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), + url, '{}/{}'.format(mobj.group('course_id'), mobj.group('part')), 'Downloading part JSON') web_url = part['web_url'] if 'library/view' in web_url: @@ -236,18 +233,18 @@ class SafariCourseIE(SafariBaseIE): @classmethod def suitable(cls, url): return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url) - else super(SafariCourseIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): course_id = self._match_id(url) course_json = self._download_json( - '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), + f'{self._API_BASE}/book/{course_id}/?override_format={self._API_FORMAT}', course_id, 'Downloading course JSON') if 'chapters' not in course_json: raise ExtractorError( - 'No chapters found for course %s' % course_id, expected=True) + f'No chapters found for course {course_id}', expected=True) entries = [ self.url_result(chapter, SafariApiIE.ie_key()) diff --git a/yt_dlp/extractor/saitosan.py b/yt_dlp/extractor/saitosan.py index a5f05e1..4ed9195 100644 --- a/yt_dlp/extractor/saitosan.py +++ b/yt_dlp/extractor/saitosan.py @@ -47,15 +47,15 @@ class SaitosanIE(InfoExtractor): base += '&sid=' + sid self._download_webpage(base, b_id, note='Polling socket') - payload = '420["room_start_join",{"room_id":"%s"}]' % b_id - payload = '%s:%s' % (len(payload), payload) + payload = f'420["room_start_join",{{"room_id":"{b_id}"}}]' + payload = f'{len(payload)}:{payload}' self._download_webpage(base, b_id, data=payload, note='Polling socket with payload') response = self._download_socket_json(base, b_id, note='Polling socket') if not response.get('ok'): err = response.get('error') or {} raise ExtractorError( - '%s said: %s - %s' % (self.IE_NAME, err.get('code', '?'), err.get('msg', 'Unknown')) if err + '{} said: {} - {}'.format(self.IE_NAME, err.get('code', '?'), err.get('msg', 'Unknown')) if err else 'The socket reported that the broadcast could not be joined. Maybe it\'s offline or the URL is incorrect', expected=True, video_id=b_id) @@ -71,5 +71,5 @@ class SaitosanIE(InfoExtractor): 'formats': self._extract_m3u8_formats(m3u8_url, b_id, 'mp4', live=True), 'thumbnail': m3u8_url.replace('av.m3u8', 'thumb'), 'uploader': try_get(b_data, lambda x: x['broadcast_user']['name']), # same as title - 'is_live': True + 'is_live': True, } diff --git a/yt_dlp/extractor/samplefocus.py b/yt_dlp/extractor/samplefocus.py index e9f5c22..36ceb02 100644 --- a/yt_dlp/extractor/samplefocus.py +++ b/yt_dlp/extractor/samplefocus.py @@ -28,10 +28,10 @@ class SampleFocusIE(InfoExtractor): }, }, { 'url': 'https://samplefocus.com/samples/dababy-style-bass-808', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://samplefocus.com/samples/young-chop-kick', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -76,7 +76,7 @@ class SampleFocusIE(InfoExtractor): def extract_count(klass): return int_or_none(self._html_search_regex( - r'<span[^>]+class=(?:["\'])?%s-count[^>]*>(\d+)' % klass, + rf'<span[^>]+class=(?:["\'])?{klass}-count[^>]*>(\d+)', webpage, klass, fatal=False)) return { @@ -90,7 +90,7 @@ class SampleFocusIE(InfoExtractor): r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<', webpage, 'license', fatal=False, group='license'), 'uploader_id': uploader_id, - 'like_count': extract_count('sample-%s-favorites' % sample_id), + 'like_count': extract_count(f'sample-{sample_id}-favorites'), 'comment_count': extract_count('comments'), 'comments': comments, 'categories': categories, diff --git a/yt_dlp/extractor/sapo.py b/yt_dlp/extractor/sapo.py index beffaee..2b8c078 100644 --- a/yt_dlp/extractor/sapo.py +++ b/yt_dlp/extractor/sapo.py @@ -64,7 +64,7 @@ class SapoIE(InfoExtractor): video_id = mobj.group('id') item = self._download_xml( - 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item') + f'http://rd3.videos.sapo.pt/{video_id}/rss2', video_id).find('./channel/item') title = item.find('./title').text description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text diff --git a/yt_dlp/extractor/sbscokr.py b/yt_dlp/extractor/sbscokr.py index 001d19e..93cf14f 100644 --- a/yt_dlp/extractor/sbscokr.py +++ b/yt_dlp/extractor/sbscokr.py @@ -94,7 +94,7 @@ class SBSCoKrIE(InfoExtractor): formats = [] for stream in traverse_obj(details, ( - 'vod', 'source', 'mediasourcelist', lambda _, v: v['mediaurl'] or v['mediarscuse'] + 'vod', 'source', 'mediasourcelist', lambda _, v: v['mediaurl'] or v['mediarscuse'], ), default=[source]): if not stream.get('mediaurl'): new_source = traverse_obj( @@ -108,7 +108,7 @@ class SBSCoKrIE(InfoExtractor): 'format_id': stream.get('mediarscuse'), 'format_note': stream.get('medianame'), **parse_resolution(stream.get('quality')), - 'preference': int_or_none(stream.get('mediarscuse')) + 'preference': int_or_none(stream.get('mediarscuse')), }) caption_url = traverse_obj(details, ('vod', 'source', 'subtitle', {url_or_none})) diff --git a/yt_dlp/extractor/screencast.py b/yt_dlp/extractor/screencast.py index df5e79b..44b5891 100644 --- a/yt_dlp/extractor/screencast.py +++ b/yt_dlp/extractor/screencast.py @@ -1,7 +1,6 @@ -import urllib.request +import urllib.parse from .common import InfoExtractor -from ..compat import compat_parse_qs from ..utils import ExtractorError @@ -16,7 +15,7 @@ class ScreencastIE(InfoExtractor): 'title': 'Color Measurement with Ocean Optics Spectrometers', 'description': 'md5:240369cde69d8bed61349a199c5fb153', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', - } + }, }, { 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', 'md5': 'e8e4b375a7660a9e7e35c33973410d34', @@ -26,7 +25,7 @@ class ScreencastIE(InfoExtractor): 'title': 'The Amadeus Spectrometer', 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', - } + }, }, { 'url': 'http://www.screencast.com/t/aAB3iowa', 'md5': 'dedb2734ed00c9755761ccaee88527cd', @@ -36,7 +35,7 @@ class ScreencastIE(InfoExtractor): 'title': 'Google Earth Export', 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', - } + }, }, { 'url': 'http://www.screencast.com/t/X3ddTrYh', 'md5': '669ee55ff9c51988b4ebc0877cc8b159', @@ -46,7 +45,7 @@ class ScreencastIE(InfoExtractor): 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', - } + }, }, { 'url': 'http://screencast.com/t/aAB3iowa', 'only_matching': True, @@ -71,8 +70,8 @@ class ScreencastIE(InfoExtractor): if flash_vars_s: flash_vars_s = flash_vars_s.replace(',', '&') if flash_vars_s: - flash_vars = compat_parse_qs(flash_vars_s) - video_url_raw = urllib.request.quote( + flash_vars = urllib.parse.parse_qs(flash_vars_s) + video_url_raw = urllib.parse.quote( flash_vars['content'][0]) video_url = video_url_raw.replace('http%3A', 'http:') diff --git a/yt_dlp/extractor/screencastomatic.py b/yt_dlp/extractor/screencastomatic.py index 28e25e9..3af9135 100644 --- a/yt_dlp/extractor/screencastomatic.py +++ b/yt_dlp/extractor/screencastomatic.py @@ -23,7 +23,7 @@ class ScreencastOMaticIE(InfoExtractor): 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', 'duration': 369, 'upload_date': '20141216', - } + }, }, { 'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl', 'only_matching': True, diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py index 85d51cd..d770368 100644 --- a/yt_dlp/extractor/scrippsnetworks.py +++ b/yt_dlp/extractor/scrippsnetworks.py @@ -56,10 +56,10 @@ class ScrippsNetworksWatchIE(AWSIE): site_id, video_id = mobj.group('site', 'id') aws_identity_id_json = json.dumps({ - 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION - }).encode('utf-8') + 'IdentityId': f'{self._AWS_REGION}:7655847c-0ae7-4d9b-80d6-56c062927eb3', + }).encode() token = self._download_json( - 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, + f'https://cognito-identity.{self._AWS_REGION}.amazonaws.com/', video_id, data=aws_identity_id_json, headers={ 'Accept': '*/*', @@ -85,11 +85,11 @@ class ScrippsNetworksWatchIE(AWSIE): def get(key): return xpath_text( - sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, + sts, f'.//{{https://sts.amazonaws.com/doc/2011-06-15/}}{key}', fatal=True) mcp_id = self._aws_execute_api({ - 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), + 'uri': f'/1/web/brands/{self._SNI_TABLE[site_id]}/episodes/scrid/{video_id}', 'access_key': get('AccessKeyId'), 'secret_key': get('SecretAccessKey'), 'session_token': get('SessionToken'), @@ -97,7 +97,7 @@ class ScrippsNetworksWatchIE(AWSIE): return self.url_result( smuggle_url( - 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + f'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:{mcp_id}', {'geo_countries': ['US']}), AnvatoIE.ie_key(), video_id=mcp_id) diff --git a/yt_dlp/extractor/scrolller.py b/yt_dlp/extractor/scrolller.py index 4f9fa14..a50822e 100644 --- a/yt_dlp/extractor/scrolller.py +++ b/yt_dlp/extractor/scrolller.py @@ -14,7 +14,7 @@ class ScrolllerIE(InfoExtractor): 'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg', 'title': 'A helping hand', 'age_limit': 0, - } + }, }, { 'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j', 'info_dict': { @@ -23,7 +23,7 @@ class ScrolllerIE(InfoExtractor): 'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg', 'title': 'Tigers chasing a drone', 'age_limit': 0, - } + }, }, { 'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p', 'info_dict': { @@ -32,7 +32,7 @@ class ScrolllerIE(InfoExtractor): 'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg', 'title': 'Baby rhino smells something', 'age_limit': 0, - } + }, }, { 'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7', 'info_dict': { @@ -41,7 +41,7 @@ class ScrolllerIE(InfoExtractor): 'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg', 'title': 'It\'s all fun and games...', 'age_limit': 0, - } + }, }, { 'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a', 'info_dict': { @@ -50,7 +50,7 @@ class ScrolllerIE(InfoExtractor): 'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg', 'title': 'May the force be with you (Octokuro)', 'age_limit': 18, - } + }, }] def _real_extract(self, url): @@ -68,7 +68,7 @@ class ScrolllerIE(InfoExtractor): height } } - }''' % video_id + }''' % video_id, # noqa: UP031 } video_data = self._download_json( @@ -98,5 +98,5 @@ class ScrolllerIE(InfoExtractor): 'title': video_data.get('title'), 'thumbnails': thumbnails, 'formats': formats, - 'age_limit': 18 if video_data.get('isNsfw') else 0 + 'age_limit': 18 if video_data.get('isNsfw') else 0, } diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py index fc91d60..3971132 100644 --- a/yt_dlp/extractor/scte.py +++ b/yt_dlp/extractor/scte.py @@ -41,7 +41,7 @@ class SCTEBaseIE(InfoExtractor): r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</', response, 'error message', default=None) if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') @@ -66,9 +66,9 @@ class SCTEIE(SCTEBaseIE): title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title') context_id = self._search_regex(r'context-(\d+)', webpage, video_id) - content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id + content_base = f'https://learning.scte.org/pluginfile.php/{context_id}/mod_scorm/content/8/' context = decode_packed_codes(self._download_webpage( - '%smobile/data.js' % content_base, video_id)) + f'{content_base}mobile/data.js', video_id)) data = self._parse_xml( self._search_regex( diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py index 7ff0cf5..cddca09 100644 --- a/yt_dlp/extractor/senategov.py +++ b/yt_dlp/extractor/senategov.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urlparse, -) from ..utils import ( ExtractorError, parse_qs, @@ -68,7 +65,7 @@ class SenateISVPIE(InfoExtractor): 'info_dict': { 'id': 'commerce011514', 'ext': 'mp4', - 'title': 'Integrated Senate Video Player' + 'title': 'Integrated Senate Video Player', }, 'params': { # m3u8 download @@ -80,8 +77,8 @@ class SenateISVPIE(InfoExtractor): 'info_dict': { 'id': 'intel090613', 'ext': 'mp4', - 'title': 'Integrated Senate Video Player' - } + 'title': 'Integrated Senate Video Player', + }, }, { # From http://www.c-span.org/video/?96791-1 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', @@ -91,7 +88,7 @@ class SenateISVPIE(InfoExtractor): def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) - qs = compat_parse_qs(self._match_valid_url(url).group('qs')) + qs = urllib.parse.parse_qs(self._match_valid_url(url).group('qs')) if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): raise ExtractorError('Invalid URL', expected=True) @@ -114,13 +111,13 @@ class SenateISVPIE(InfoExtractor): formats = [] if video_type == 'arch': filename = video_id if '.' in video_id else video_id + '.mp4' - m3u8_url = compat_urlparse.urljoin(domain, 'i/' + filename + '/master.m3u8') + m3u8_url = urllib.parse.urljoin(domain, 'i/' + filename + '/master.m3u8') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8') else: hdcore_sign = 'hdcore=3.1.0' url_params = (domain, video_id, stream_num) f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params - m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params + m3u8_url = '{}/i/{}_1@{}/master.m3u8'.format(*url_params) for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): # URLs without the extra param induce an 404 error entry.update({'extra_param_to_segment_url': hdcore_sign}) @@ -196,5 +193,5 @@ class SenateGovIE(InfoExtractor): 'description': self._og_search_description(webpage, default=None), 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'age_limit': self._rta_search(webpage), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py index 99fcf51..ecf4b27 100644 --- a/yt_dlp/extractor/sendtonews.py +++ b/yt_dlp/extractor/sendtonews.py @@ -19,7 +19,7 @@ class SendtoNewsIE(InfoExtractor): # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', 'info_dict': { - 'id': 'GxfCe0Zo7D-175909-5588' + 'id': 'GxfCe0Zo7D-175909-5588', }, 'playlist_count': 8, # test the first video only to prevent lengthy tests @@ -75,7 +75,7 @@ class SendtoNewsIE(InfoExtractor): if not tbr: continue f.update({ - 'format_id': '%s-%d' % (determine_protocol(f), tbr), + 'format_id': f'{determine_protocol(f)}-{tbr}', 'tbr': tbr, }) @@ -98,7 +98,7 @@ class SendtoNewsIE(InfoExtractor): 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), # 'tbr' was explicitly set to be preferred over 'height' originally, # So this is being kept unless someone can confirm this is unnecessary - '_format_sort_fields': ('tbr', 'res') + '_format_sort_fields': ('tbr', 'res'), }) entries.append(info_dict) diff --git a/yt_dlp/extractor/servus.py b/yt_dlp/extractor/servus.py index dda1958..117f180 100644 --- a/yt_dlp/extractor/servus.py +++ b/yt_dlp/extractor/servus.py @@ -39,7 +39,7 @@ class ServusIE(InfoExtractor): 'episode': 'Episode 8 - Vie Ferrate – Klettersteige in den Alpen', 'episode_number': 8, }, - 'params': {'skip_download': 'm3u8'} + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.servustv.com/natur/v/aa-1xg5xwmgw2112/', 'only_matching': True, diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py index 6c688d1..6e1fbe7 100644 --- a/yt_dlp/extractor/sevenplus.py +++ b/yt_dlp/extractor/sevenplus.py @@ -2,7 +2,6 @@ import json import re from .brightcove import BrightcoveNewBaseIE -from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -31,7 +30,7 @@ class SevenPlusIE(BrightcoveNewBaseIE): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001', 'only_matching': True, @@ -71,7 +70,7 @@ class SevenPlusIE(BrightcoveNewBaseIE): 'idToken': id_token, 'platformId': 'web', 'regSource': '7plus', - }).encode('utf-8')) or {} + }).encode()) or {} self.token = token_resp.get('token') if not self.token: self.report_warning('Unable to log in: Could not extract auth token') @@ -120,7 +119,7 @@ class SevenPlusIE(BrightcoveNewBaseIE): if value: info[dst_key] = value info['series'] = try_get( - item, lambda x: x['seriesLogo']['name'], compat_str) + item, lambda x: x['seriesLogo']['name'], str) mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title']) if mobj: info.update({ diff --git a/yt_dlp/extractor/sexu.py b/yt_dlp/extractor/sexu.py index 989b63c..71b1076 100644 --- a/yt_dlp/extractor/sexu.py +++ b/yt_dlp/extractor/sexu.py @@ -15,7 +15,7 @@ class SexuIE(InfoExtractor): 'categories': list, # NSFW 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py index b31d566..6f2bfcd 100644 --- a/yt_dlp/extractor/seznamzpravy.py +++ b/yt_dlp/extractor/seznamzpravy.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlparse, -) from ..utils import ( int_or_none, parse_codecs, @@ -13,7 +11,7 @@ from ..utils import ( def _raw_id(src_url): - return compat_urllib_parse_urlparse(src_url).path.split('/')[-1] + return urllib.parse.urlparse(src_url).path.split('/')[-1] class SeznamZpravyIE(InfoExtractor): @@ -68,7 +66,7 @@ class SeznamZpravyIE(InfoExtractor): f = { 'url': urljoin(sdn_url, relative_url), - 'format_id': 'http-%s' % format_id, + 'format_id': f'http-{format_id}', 'tbr': int_or_none(format_data.get('bandwidth'), scale=1000), 'width': int_or_none(width), 'height': int_or_none(height), @@ -79,7 +77,7 @@ class SeznamZpravyIE(InfoExtractor): pls = sdn_data.get('pls', {}) def get_url(format_id): - return try_get(pls, lambda x: x[format_id]['url'], compat_str) + return try_get(pls, lambda x: x[format_id]['url'], str) dash_rel_url = get_url('dash') if dash_rel_url: diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index 89aee27..f0a3b6b 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -63,17 +63,17 @@ class ShahidIE(ShahidBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746', - 'only_matching': True + 'only_matching': True, }, { # shahid plus subscriber only 'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319', - 'only_matching': True + 'only_matching': True, }] def _perform_login(self, username, password): @@ -84,7 +84,7 @@ class ShahidIE(ShahidBaseIE): 'email': username, 'password': password, 'basic': 'false', - }).encode('utf-8'), headers={ + }).encode(), headers={ 'Content-Type': 'application/json; charset=UTF-8', })['user'] except ExtractorError as e: @@ -127,7 +127,7 @@ class ShahidIE(ShahidBaseIE): # })['productModel'] response = self._download_json( - 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id), + f'http://api.shahid.net/api/v1_1/{page_type}/{video_id}', video_id, 'Downloading video JSON', query={ 'apiKey': 'sh@hid0nlin3', 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', @@ -136,7 +136,7 @@ class ShahidIE(ShahidBaseIE): error = data.get('error') if error: raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), + '{} returned error: {}'.format(self.IE_NAME, '\n'.join(error.values())), expected=True) video = data[page_type] @@ -175,7 +175,7 @@ class ShahidShowIE(ShahidBaseIE): 'playlist_mincount': 32, }, { 'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861', - 'only_matching': True + 'only_matching': True, }] _PAGE_SIZE = 30 @@ -196,7 +196,7 @@ class ShahidShowIE(ShahidBaseIE): 'pageSize': 30, 'sorts': [{ 'order': 'DESC', - 'type': 'SORTDATE' + 'type': 'SORTDATE', }], }) for product in playlist.get('productList', {}).get('products', []): diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index cca86ed..284b2f8 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -1,8 +1,7 @@ +import base64 + from .common import InfoExtractor from ..aes import aes_cbc_decrypt, unpad_pkcs7 -from ..compat import ( - compat_b64decode, -) from ..utils import ( ExtractorError, bytes_to_intlist, @@ -24,8 +23,8 @@ class ShemarooMeIE(InfoExtractor): 'description': 'md5:2782c4127807103cf5a6ae2ca33645ce', }, 'params': { - 'skip_download': True - } + 'skip_download': True, + }, }, { 'url': 'https://www.shemaroome.com/shows/jurm-aur-jazbaat/laalach', 'info_dict': { @@ -37,9 +36,9 @@ class ShemarooMeIE(InfoExtractor): 'release_date': '20210507', }, 'params': { - 'skip_download': True + 'skip_download': True, }, - 'skip': 'Premium videos cannot be downloaded yet.' + 'skip': 'Premium videos cannot be downloaded yet.', }, { 'url': 'https://www.shemaroome.com/shows/jai-jai-jai-bajrang-bali/jai-jai-jai-bajrang-bali-episode-99', 'info_dict': { @@ -51,8 +50,8 @@ class ShemarooMeIE(InfoExtractor): 'release_date': '20110101', }, 'params': { - 'skip_download': True - } + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -69,8 +68,8 @@ class ShemarooMeIE(InfoExtractor): data_json = self._download_json('https://www.shemaroome.com/users/user_all_lists', video_id, data=data.encode()) if not data_json.get('status'): raise ExtractorError('Premium videos cannot be downloaded yet.', expected=True) - url_data = bytes_to_intlist(compat_b64decode(data_json['new_play_url'])) - key = bytes_to_intlist(compat_b64decode(data_json['key'])) + url_data = bytes_to_intlist(base64.b64decode(data_json['new_play_url'])) + key = bytes_to_intlist(base64.b64decode(data_json['key'])) iv = [0] * 16 m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii') headers = {'stream_key': data_json['stream_key']} diff --git a/yt_dlp/extractor/showroomlive.py b/yt_dlp/extractor/showroomlive.py index ab18953..303f0b3 100644 --- a/yt_dlp/extractor/showroomlive.py +++ b/yt_dlp/extractor/showroomlive.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -24,18 +23,18 @@ class ShowRoomLiveIE(InfoExtractor): r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id') room = self._download_json( - urljoin(url, '/api/room/profile?room_id=%s' % room_id), + urljoin(url, f'/api/room/profile?room_id={room_id}'), broadcaster_id) is_live = room.get('is_onlive') if is_live is not True: - raise ExtractorError('%s is offline' % broadcaster_id, expected=True) + raise ExtractorError(f'{broadcaster_id} is offline', expected=True) uploader = room.get('performer_name') or broadcaster_id title = room.get('room_name') or room.get('main_name') or uploader streaming_url_list = self._download_json( - urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id), + urljoin(url, f'/api/live/streaming_url?room_id={room_id}'), broadcaster_id)['streaming_url_list'] formats = [] @@ -68,7 +67,7 @@ class ShowRoomLiveIE(InfoExtractor): }) return { - 'id': compat_str(room.get('live_id') or broadcaster_id), + 'id': str(room.get('live_id') or broadcaster_id), 'title': title, 'description': room.get('description'), 'timestamp': int_or_none(room.get('current_live_started_at')), diff --git a/yt_dlp/extractor/sibnet.py b/yt_dlp/extractor/sibnet.py index 73bb75d..c3fb72e 100644 --- a/yt_dlp/extractor/sibnet.py +++ b/yt_dlp/extractor/sibnet.py @@ -8,10 +8,10 @@ class SibnetEmbedIE(InfoExtractor): _WEBPAGE_TESTS = [{ 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html', 'info_dict': { - 'id': 'shell', # FIXME? + 'id': 'shell', # FIXME: Non unique ID? 'ext': 'mp4', 'age_limit': 0, 'thumbnail': 'https://video.sibnet.ru/upload/cover/video_1887072_0.jpg', 'title': 'КВН Москва не сразу строилась - Девушка впервые играет в Mortal Kombat', - } + }, }] diff --git a/yt_dlp/extractor/simplecast.py b/yt_dlp/extractor/simplecast.py index ec349dd..f6bb130 100644 --- a/yt_dlp/extractor/simplecast.py +++ b/yt_dlp/extractor/simplecast.py @@ -19,7 +19,7 @@ class SimplecastBaseIE(InfoExtractor): def _call_search_api(self, resource, resource_id, resource_url): return self._download_json( - 'https://api.simplecast.com/%ss/search' % resource, resource_id, + f'https://api.simplecast.com/{resource}s/search', resource_id, data=urlencode_postdata({'url': resource_url})) def _parse_episode(self, episode): @@ -33,7 +33,7 @@ class SimplecastBaseIE(InfoExtractor): season_id = None if season_href: season_id = self._search_regex( - r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX, + rf'https?://api.simplecast.com/seasons/({self._UUID_REGEX})', season_href, 'season id', default=None) webpage_url = episode.get('episode_url') @@ -65,7 +65,7 @@ class SimplecastBaseIE(InfoExtractor): class SimplecastIE(SimplecastBaseIE): IE_NAME = 'simplecast' - _VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX + _VALID_URL = rf'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>{SimplecastBaseIE._UUID_REGEX})' _EMBED_REGEX = [rf'''(?x)<iframe[^>]+src=["\'] (?P<url>https?://(?: embed\.simplecast\.com/[0-9a-f]{8}| diff --git a/yt_dlp/extractor/sina.py b/yt_dlp/extractor/sina.py index eeb9ebb..974af1b 100644 --- a/yt_dlp/extractor/sina.py +++ b/yt_dlp/extractor/sina.py @@ -28,7 +28,7 @@ class SinaIE(InfoExtractor): 'id': '250576622', 'ext': 'mp4', 'title': '现场:克鲁兹宣布退选 特朗普将稳获提名', - } + }, }, { 'url': 'http://video.sina.com.cn/v/b/101314253-1290078633.html', @@ -66,8 +66,7 @@ class SinaIE(InfoExtractor): webpage = self._download_webpage(url, pseudo_id) error = get_element_by_attribute('class', 'errtitle', webpage) if error: - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, clean_html(error)), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {clean_html(error)}', expected=True) video_id = self._search_regex( r"video_id\s*:\s*'(\d+)'", webpage, 'video id') @@ -75,7 +74,7 @@ class SinaIE(InfoExtractor): 'http://s.video.sina.com.cn/video/h5play', video_id, query={'video_id': video_id}) if video_data['code'] != 1: - raise ExtractorError('%s said: %s' % ( + raise ExtractorError('{} said: {}'.format( self.IE_NAME, video_data['message']), expected=True) else: video_data = video_data['data'] diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py index 44619a1..6037a35 100644 --- a/yt_dlp/extractor/sixplay.py +++ b/yt_dlp/extractor/sixplay.py @@ -1,7 +1,4 @@ from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( determine_ext, int_or_none, @@ -44,9 +41,9 @@ class SixPlayIE(InfoExtractor): }.get(domain, ('6play', 'm6web')) data = self._download_json( - 'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/%s/videos/clip_%s' % (service, video_id), + f'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/{service}/videos/clip_{video_id}', video_id, headers={ - 'x-customer-name': consumer_name + 'x-customer-name': consumer_name, }, query={ 'csa': 5, 'with': 'clips', @@ -82,7 +79,7 @@ class SixPlayIE(InfoExtractor): asset_url = urlh.url asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/') for i in range(3, 0, -1): - asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i) + asset_url = asset_url.replace('_sd1/', f'_sd{i}/') m3u8_formats = self._extract_m3u8_formats( asset_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) @@ -107,7 +104,7 @@ class SixPlayIE(InfoExtractor): def get(getter): for src in (data, clip_data): - v = try_get(src, getter, compat_str) + v = try_get(src, getter, str) if v: return v diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py index 54dfdc4..bc5ec3d 100644 --- a/yt_dlp/extractor/skeb.py +++ b/yt_dlp/extractor/skeb.py @@ -20,8 +20,8 @@ class SkebIE(InfoExtractor): 'subtitles': { 'jpn': [{ 'url': r're:https://skeb.+', - 'ext': 'vtt' - }] + 'ext': 'vtt', + }], }, 'width': 720, 'height': 405, @@ -48,8 +48,8 @@ class SkebIE(InfoExtractor): 'subtitles': { 'jpn': [{ 'url': r're:https://skeb.+', - 'ext': 'vtt' - }] + 'ext': 'vtt', + }], }, 'duration': 98, 'ext': 'mp3', @@ -70,8 +70,8 @@ class SkebIE(InfoExtractor): }, { 'id': '486431', 'title': 'ヒロ。\n\n私のキャラク... by 諸々', - }] - } + }], + }, }] def _real_extract(self, url): @@ -106,7 +106,7 @@ class SkebIE(InfoExtractor): if width is not None and height is not None: # the longest side is at most 720px for non-client viewers max_size = max(width, height) - width, height = list(x * 720 // max_size for x in (width, height)) + width, height = (x * 720 // max_size for x in (width, height)) entries.append({ **parent, 'id': str(item['id']), @@ -116,7 +116,7 @@ class SkebIE(InfoExtractor): 'jpn': [{ 'url': item.get('vtt_url'), 'ext': 'vtt', - }] + }], } if item.get('vtt_url') else None, 'width': width, 'height': height, diff --git a/yt_dlp/extractor/sky.py b/yt_dlp/extractor/sky.py index 574ac21..5c9e4f3 100644 --- a/yt_dlp/extractor/sky.py +++ b/yt_dlp/extractor/sky.py @@ -94,7 +94,7 @@ class SkyNewsStoryIE(SkyBaseIE): 'upload_date': '20211027', 'timestamp': 1635317494, 'uploader_id': '6058004172001', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index 42d30f7..6e29732 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( dict_get, int_or_none, @@ -56,7 +54,7 @@ class SkyItPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - domain = compat_parse_qs(compat_urllib_parse_urlparse( + domain = urllib.parse.parse_qs(urllib.parse.urlparse( url).query).get('domain', [None])[0] token = dict_get(self._TOKEN_MAP, (domain, 'sky')) video = self._download_json( @@ -64,7 +62,7 @@ class SkyItPlayerIE(InfoExtractor): video_id, query={ 'caller': 'sky', 'id': video_id, - 'token': token + 'token': token, }, headers=self.geo_verification_headers()) return self._parse_video(video, video_id) diff --git a/yt_dlp/extractor/skylinewebcams.py b/yt_dlp/extractor/skylinewebcams.py index 197407c..102a2f9 100644 --- a/yt_dlp/extractor/skylinewebcams.py +++ b/yt_dlp/extractor/skylinewebcams.py @@ -15,7 +15,7 @@ class SkylineWebcamsIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py index 234703c..7ac2443 100644 --- a/yt_dlp/extractor/skynewsarabia.py +++ b/yt_dlp/extractor/skynewsarabia.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( parse_duration, parse_iso8601, @@ -10,7 +9,7 @@ class SkyNewsArabiaBaseIE(InfoExtractor): _IMAGE_BASE_URL = 'http://www.skynewsarabia.com/web/images' def _call_api(self, path, value): - return self._download_json('http://api.skynewsarabia.com/web/rest/v2/%s/%s.json' % (path, value), value) + return self._download_json(f'http://api.skynewsarabia.com/web/rest/v2/{path}/{value}.json', value) def _get_limelight_media_id(self, url): return self._search_regex(r'/media/[^/]+/([a-z0-9]{32})', url, 'limelight media id') @@ -19,11 +18,11 @@ class SkyNewsArabiaBaseIE(InfoExtractor): return self._IMAGE_BASE_URL + image_path_template.format(width=width, height=height) def _extract_video_info(self, video_data): - video_id = compat_str(video_data['id']) + video_id = str(video_data['id']) topic = video_data.get('topicTitle') return { '_type': 'url_transparent', - 'url': 'limelight:media:%s' % self._get_limelight_media_id(video_data['videoUrl'][0]['url']), + 'url': 'limelight:media:{}'.format(self._get_limelight_media_id(video_data['videoUrl'][0]['url'])), 'id': video_id, 'title': video_data['headline'], 'description': video_data.get('summary'), @@ -32,7 +31,7 @@ class SkyNewsArabiaBaseIE(InfoExtractor): 'duration': parse_duration(video_data.get('runTime')), 'tags': video_data.get('tags', []), 'categories': [topic] if topic else [], - 'webpage_url': 'http://www.skynewsarabia.com/web/video/%s' % video_id, + 'webpage_url': f'http://www.skynewsarabia.com/web/video/{video_id}', 'ie_key': 'LimelightMedia', } @@ -101,7 +100,7 @@ class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE): topic = article_data.get('topicTitle') return { '_type': 'url_transparent', - 'url': 'limelight:media:%s' % self._get_limelight_media_id(media_asset['videoUrl'][0]['url']), + 'url': 'limelight:media:{}'.format(self._get_limelight_media_id(media_asset['videoUrl'][0]['url'])), 'id': article_id, 'title': article_data['headline'], 'description': article_data.get('summary'), diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py index 43a9c82..617738a 100644 --- a/yt_dlp/extractor/skynewsau.py +++ b/yt_dlp/extractor/skynewsau.py @@ -22,21 +22,21 @@ class SkyNewsAUIE(InfoExtractor): 'tags': ['fblink', 'msn', 'usa', 'world', 'yt'], 'upload_date': '20211015', }, - 'params': {'skip_download': True, 'format': 'bv'} + 'params': {'skip_download': True, 'format': 'bv'}, }] _API_KEY = '6krsj3w249nk779d8fukqx9f' def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) embedcode = self._search_regex(r'embedcode\s?=\s?\"([^\"]+)\"', webpage, 'embedcode') data_json = self._download_json( - f'https://content.api.news/v3/videos/brightcove/{embedcode}?api_key={self._API_KEY}', id)['content'] + f'https://content.api.news/v3/videos/brightcove/{embedcode}?api_key={self._API_KEY}', video_id)['content'] return { - 'id': id, + 'id': video_id, '_type': 'url_transparent', - 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % tuple(embedcode.split('-')), + 'url': 'https://players.brightcove.net/{}/default_default/index.html?videoId={}'.format(*tuple(embedcode.split('-'))), 'ie_key': 'BrightcoveNew', 'title': data_json.get('caption'), 'upload_date': unified_strdate(try_get(data_json, lambda x: x['date']['created'])), diff --git a/yt_dlp/extractor/slideshare.py b/yt_dlp/extractor/slideshare.py index ab9dad0..302b6e1 100644 --- a/yt_dlp/extractor/slideshare.py +++ b/yt_dlp/extractor/slideshare.py @@ -1,9 +1,7 @@ import json +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_urlparse, -) from ..utils import ( ExtractorError, get_element_by_id, @@ -32,12 +30,12 @@ class SlideshareIE(InfoExtractor): webpage, 'slideshare object') info = json.loads(slideshare_obj) if info['slideshow']['type'] != 'video': - raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) + raise ExtractorError('Webpage type is "{}": only video extraction is supported for Slideshare'.format(info['slideshow']['type']), expected=True) doc = info['doc'] bucket = info['jsplayer']['video_bucket'] ext = info['jsplayer']['video_extension'] - video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) + video_url = urllib.parse.urljoin(bucket, doc + '-SD.' + ext) description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex( r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage, 'description', fatal=False) diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index a1328de..e684ac7 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -532,7 +532,7 @@ class SlidesLiveIE(InfoExtractor): }, note='Downloading video slides info', errnote='Failed to download video slides info') or {} for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1): - if not traverse_obj(slide, ('video', 'service')) == 'yoda': + if traverse_obj(slide, ('video', 'service')) != 'yoda': continue video_path = traverse_obj(slide, ('video', 'id')) cdn_hostname = traverse_obj(service_data, ( diff --git a/yt_dlp/extractor/slutload.py b/yt_dlp/extractor/slutload.py index 8e6e89c..c04898b 100644 --- a/yt_dlp/extractor/slutload.py +++ b/yt_dlp/extractor/slutload.py @@ -11,7 +11,7 @@ class SlutloadIE(InfoExtractor): 'ext': 'mp4', 'title': 'virginie baisee en cam', 'age_limit': 18, - 'thumbnail': r're:https?://.*?\.jpg' + 'thumbnail': r're:https?://.*?\.jpg', }, }, { # mobile site @@ -29,14 +29,14 @@ class SlutloadIE(InfoExtractor): video_id = self._match_id(url) embed_page = self._download_webpage( - 'http://www.slutload.com/embed_player/%s' % video_id, video_id, + f'http://www.slutload.com/embed_player/{video_id}', video_id, 'Downloading embed page', fatal=False) if embed_page: def extract(what): return self._html_search_regex( - r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what, - embed_page, 'video %s' % what, default=None, group='url') + rf'data-video-{what}=(["\'])(?P<url>(?:(?!\1).)+)\1', + embed_page, f'video {what}', default=None, group='url') video_url = extract('url') if video_url: @@ -47,11 +47,11 @@ class SlutloadIE(InfoExtractor): 'url': video_url, 'title': title, 'thumbnail': extract('preview'), - 'age_limit': 18 + 'age_limit': 18, } webpage = self._download_webpage( - 'http://www.slutload.com/video/_/%s/' % video_id, video_id) + f'http://www.slutload.com/video/_/{video_id}/', video_id) title = self._html_search_regex( r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip() info = self._parse_html5_media_entries(url, webpage, video_id)[0] diff --git a/yt_dlp/extractor/snotr.py b/yt_dlp/extractor/snotr.py index 6889f19..859e5e8 100644 --- a/yt_dlp/extractor/snotr.py +++ b/yt_dlp/extractor/snotr.py @@ -30,7 +30,7 @@ class SnotrIE(InfoExtractor): 'filesize_approx': 8500000, 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py index a41ad30..23faee1 100644 --- a/yt_dlp/extractor/sohu.py +++ b/yt_dlp/extractor/sohu.py @@ -1,11 +1,8 @@ import base64 import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( ExtractorError, float_or_none, @@ -51,7 +48,7 @@ class SohuIE(InfoExtractor): 'upload_date': '20150305', 'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg', 'tags': ['爱范儿', '爱范品', 'MWC', '手机'], - } + }, }, { 'note': 'Multipart video', 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml', @@ -71,22 +68,22 @@ class SohuIE(InfoExtractor): 'ext': 'mp4', 'duration': 294, 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', - } + }, }, { 'info_dict': { 'id': '78910339_part2', 'ext': 'mp4', 'duration': 300, 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', - } + }, }, { 'info_dict': { 'id': '78910339_part3', 'ext': 'mp4', 'duration': 150, 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', - } - }] + }, + }], }, { 'note': 'Video with title containing dash', 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml', @@ -101,8 +98,8 @@ class SohuIE(InfoExtractor): 'tags': [], }, 'params': { - 'skip_download': True - } + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -115,7 +112,7 @@ class SohuIE(InfoExtractor): return self._download_json( base_data_url + vid_id, video_id, - 'Downloading JSON data for %s' % vid_id, + f'Downloading JSON data for {vid_id}', headers=self.geo_verification_headers()) mobj = self._match_valid_url(url) @@ -133,18 +130,18 @@ class SohuIE(InfoExtractor): if vid_data['play'] != 1: if vid_data.get('status') == 12: raise ExtractorError( - '%s said: There\'s something wrong in the video.' % self.IE_NAME, + f'{self.IE_NAME} said: There\'s something wrong in the video.', expected=True) else: self.raise_geo_restricted( - '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME) + f'{self.IE_NAME} said: The video is only licensed to users in Mainland China.') formats_json = {} for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'): - vid_id = vid_data['data'].get('%sVid' % format_id) + vid_id = vid_data['data'].get(f'{format_id}Vid') if not vid_id: continue - vid_id = compat_str(vid_id) + vid_id = str(vid_id) formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv) part_count = vid_data['data']['totalBlocks'] @@ -162,7 +159,7 @@ class SohuIE(InfoExtractor): su = data['su'] video_url = 'newflv.sohu.ccgslb.net' - cdnId = None + cdn_id = None retries = 0 while 'newflv.sohu.ccgslb.net' in video_url: @@ -174,20 +171,19 @@ class SohuIE(InfoExtractor): 'rb': 1, } - if cdnId is not None: - params['idc'] = cdnId + if cdn_id is not None: + params['idc'] = cdn_id - download_note = 'Downloading %s video URL part %d of %d' % ( - format_id, i + 1, part_count) + download_note = f'Downloading {format_id} video URL part {i + 1} of {part_count}' if retries > 0: - download_note += ' (retry #%d)' % retries + download_note += f' (retry #{retries})' part_info = self._parse_json(self._download_webpage( - 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)), + f'http://{allot}/?{urllib.parse.urlencode(params)}', video_id, download_note), video_id) video_url = part_info['url'] - cdnId = part_info.get('nid') + cdn_id = part_info.get('nid') retries += 1 if retries > 5: @@ -204,7 +200,7 @@ class SohuIE(InfoExtractor): }) playlist.append({ - 'id': '%s_part%d' % (video_id, i + 1), + 'id': f'{video_id}_part{i + 1}', 'title': title, 'duration': vid_data['data']['clipsDuration'][i], 'formats': formats, @@ -269,7 +265,7 @@ class SohuVIE(InfoExtractor): 'upload_date': '20150305', 'thumbnail': 'http://e3f49eaa46b57.cdn.sohucs.com//group1/M10/83/FA/MTAuMTAuODguODA=/6_14cbccdde5eg104SysCutcloud_78693464_7_0b.jpg', 'tags': ['爱范儿', '爱范品', 'MWC', '手机'], - } + }, }, { 'note': 'Multipart video', 'url': 'https://tv.sohu.com/v/dXMvMjQyNTYyMTYzLzc4OTEwMzM5LnNodG1s.html?src=pl', diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py index 7c914ac..a0a051e 100644 --- a/yt_dlp/extractor/sonyliv.py +++ b/yt_dlp/extractor/sonyliv.py @@ -73,7 +73,7 @@ class SonyLIVIE(InfoExtractor): if c == 'x': t[i] = str(n) elif c == 'y': - t[i] = '{:x}'.format(3 & n | 8) + t[i] = f'{3 & n | 8:x}' return ''.join(t) + '-' + str(int(time.time() * 1000)) def _perform_login(self, username, password): @@ -121,7 +121,7 @@ class SonyLIVIE(InfoExtractor): def _call_api(self, version, path, video_id): try: return self._download_json( - 'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path), + f'https://apiv2.sonyliv.com/AGL/{version}/A/ENG/WEB/{path}', video_id, headers=self._HEADERS)['resultObj'] except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 406 and self._parse_json( @@ -146,7 +146,7 @@ class SonyLIVIE(InfoExtractor): self.report_drm(video_id) dash_url = content['videoURL'] headers = { - 'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000) + 'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000), } formats = self._extract_mpd_formats( dash_url, video_id, mpd_id='dash', headers=headers, fatal=False) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 3581461..0c6f0b0 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -4,13 +4,11 @@ import json import re from .common import InfoExtractor, SearchInfoExtractor -from ..compat import compat_str from ..networking import HEADRequest from ..networking.exceptions import HTTPError from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, - error_to_compat_str, float_or_none, int_or_none, join_nonempty, @@ -97,7 +95,7 @@ class SoundcloudBaseIE(InfoExtractor): return raise ExtractorError('Unable to extract client id') - def _download_json(self, *args, **kwargs): + def _call_api(self, *args, **kwargs): non_fatal = kwargs.get('fatal') is False if non_fatal: del kwargs['fatal'] @@ -106,14 +104,14 @@ class SoundcloudBaseIE(InfoExtractor): query['client_id'] = self._CLIENT_ID kwargs['query'] = query try: - return super()._download_json(*args, **kwargs) + return self._download_json(*args, **kwargs) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): self._store_client_id(None) self._update_client_id() continue elif non_fatal: - self.report_warning(error_to_compat_str(e)) + self.report_warning(str(e)) return False raise @@ -165,7 +163,7 @@ class SoundcloudBaseIE(InfoExtractor): 'user_agent': self._USER_AGENT } - response = self._download_json( + response = self._call_api( self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID), None, note='Verifying login token...', fatal=False, data=json.dumps(payload).encode()) @@ -196,24 +194,20 @@ class SoundcloudBaseIE(InfoExtractor): t = clid # _CLIENT_ID d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]]) - p = n + y + d + r + e + t + d + n - h = p + h = n + y + d + r + e + t + d + n m = 8011470 - f = 0 - for f in range(f, len(h)): + for f in range(len(h)): m = (m >> 1) + ((1 & m) << 23) m += ord(h[f]) m &= 16777215 # c is not even needed - out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c) - - return out + return f'{y}:{d}:{m:x}:{c}' def _extract_info_dict(self, info, full_title=None, secret_token=None, extract_flat=False): - track_id = compat_str(info['id']) + track_id = str(info['id']) title = info['title'] format_urls = set() @@ -223,12 +217,26 @@ class SoundcloudBaseIE(InfoExtractor): query['secret_token'] = secret_token if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'): - download_url = update_url_query( - self._API_V2_BASE + 'tracks/' + track_id + '/download', query) - redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') - if redirect_url: + try: + # Do not use _call_api(); HTTP Error codes have different meanings for this request + download_data = self._download_json( + f'{self._API_V2_BASE}tracks/{track_id}/download', track_id, + 'Downloading original download format info JSON', query=query, headers=self._HEADERS) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + self.report_warning( + 'Original download format is only available ' + f'for registered users. {self._login_hint()}') + elif isinstance(e.cause, HTTPError) and e.cause.status == 403: + self.write_debug('Original download format is not available for this client') + else: + self.report_warning(e.msg) + download_data = None + + if redirect_url := traverse_obj(download_data, ('redirectUri', {url_or_none})): urlh = self._request_webpage( - HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False) + HEADRequest(redirect_url), track_id, 'Checking original download format availability', + 'Original download format is not available', fatal=False) if urlh: format_url = urlh.url format_urls.add(format_url) @@ -309,7 +317,7 @@ class SoundcloudBaseIE(InfoExtractor): stream = None for retry in self.RetryManager(fatal=False): try: - stream = self._download_json( + stream = self._call_api( format_url, track_id, f'Downloading {identifier} format info JSON', query=query, headers=self._HEADERS) except ExtractorError as e: @@ -342,12 +350,12 @@ class SoundcloudBaseIE(InfoExtractor): thumbnails = [] artwork_url = info.get('artwork_url') thumbnail = artwork_url or user.get('avatar_url') - if isinstance(thumbnail, compat_str): + if isinstance(thumbnail, str): if re.search(self._IMAGE_REPL_RE, thumbnail): for image_id, size in self._ARTWORK_MAP.items(): i = { 'id': image_id, - 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail), + 'url': re.sub(self._IMAGE_REPL_RE, f'-{image_id}.jpg', thumbnail), } if image_id == 'tiny' and not artwork_url: size = 18 @@ -363,7 +371,7 @@ class SoundcloudBaseIE(InfoExtractor): thumbnails = [{'url': thumbnail}] def extract_count(key): - return int_or_none(info.get('%s_count' % key)) + return int_or_none(info.get(f'{key}_count')) return { 'id': track_id, @@ -382,7 +390,7 @@ class SoundcloudBaseIE(InfoExtractor): 'comment_count': extract_count('comment'), 'repost_count': extract_count('reposts'), 'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)), - 'formats': formats if not extract_flat else None + 'formats': formats if not extract_flat else None, } @classmethod @@ -434,7 +442,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg', 'uploader_url': 'https://soundcloud.com/ethmusic', 'genres': [], - } + }, }, # geo-restricted { @@ -467,7 +475,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'description': 'test chars: \"\'/\\ä↭', + 'description': 'test chars: "\'/\\ä↭', 'uploader': 'jaimeMF', 'uploader_id': '69767071', 'timestamp': 1386604920, @@ -491,7 +499,7 @@ class SoundcloudIE(SoundcloudBaseIE): 'id': '123998367', 'ext': 'mp3', 'title': 'Youtube - Dl Test Video \'\' Ä↭', - 'description': 'test chars: \"\'/\\ä↭', + 'description': 'test chars: "\'/\\ä↭', 'uploader': 'jaimeMF', 'uploader_id': '69767071', 'timestamp': 1386604920, @@ -630,13 +638,13 @@ class SoundcloudIE(SoundcloudBaseIE): if token: query['secret_token'] = token else: - full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title') + full_title = resolve_title = '{}/{}'.format(*mobj.group('uploader', 'title')) token = mobj.group('token') if token: - resolve_title += '/%s' % token + resolve_title += f'/{token}' info_json_url = self._resolv_url(self._BASE_URL + resolve_title) - info = self._download_json( + info = self._call_api( info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS) return self._extract_info_dict(info, full_title, token) @@ -644,13 +652,13 @@ class SoundcloudIE(SoundcloudBaseIE): class SoundcloudPlaylistBaseIE(SoundcloudBaseIE): def _extract_set(self, playlist, token=None): - playlist_id = compat_str(playlist['id']) + playlist_id = str(playlist['id']) tracks = playlist.get('tracks') or [] - if not all([t.get('permalink_url') for t in tracks]) and token: - tracks = self._download_json( + if not all(t.get('permalink_url') for t in tracks) and token: + tracks = self._call_api( self._API_V2_BASE + 'tracks', playlist_id, 'Downloading tracks', query={ - 'ids': ','.join([compat_str(t['id']) for t in tracks]), + 'ids': ','.join([str(t['id']) for t in tracks]), 'playlistId': playlist_id, 'playlistSecretToken': token, }, headers=self._HEADERS) @@ -700,17 +708,17 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) - full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title') + full_title = '{}/sets/{}'.format(*mobj.group('uploader', 'slug_title')) token = mobj.group('token') if token: full_title += '/' + token - info = self._download_json(self._resolv_url( + info = self._call_api(self._resolv_url( self._BASE_URL + full_title), full_title, headers=self._HEADERS) if 'errors' in info: - msgs = (compat_str(err['error_message']) for err in info['errors']) - raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) + msgs = (str(err['error_message']) for err in info['errors']) + raise ExtractorError('unable to download video webpage: {}'.format(','.join(msgs))) return self._extract_set(info, token) @@ -736,7 +744,7 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE): for i in itertools.count(): for retry in self.RetryManager(): try: - response = self._download_json( + response = self._call_api( url, playlist_id, query=query, headers=self._HEADERS, note=f'Downloading track page {i + 1}') break @@ -844,7 +852,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): mobj = self._match_valid_url(url) uploader = mobj.group('user') - user = self._download_json( + user = self._call_api( self._resolv_url(self._BASE_URL + uploader), uploader, 'Downloading user info', headers=self._HEADERS) @@ -853,7 +861,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): return self._extract_playlist( self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'], str_or_none(user.get('id')), - '%s (%s)' % (user['username'], resource.capitalize())) + '{} ({})'.format(user['username'], resource.capitalize())) class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE): @@ -870,7 +878,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): user_id = self._match_id(url) - user = self._download_json( + user = self._call_api( self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS) return self._extract_playlist( @@ -892,13 +900,13 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): track_name = self._match_id(url) - track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS) + track = self._call_api(self._resolv_url(url), track_name, headers=self._HEADERS) track_id = self._search_regex( r'soundcloud:track-stations:(\d+)', track['id'], 'track id') return self._extract_playlist( - self._API_V2_BASE + 'stations/%s/tracks' % track['id'], - track_id, 'Track station: %s' % track['title']) + self._API_V2_BASE + 'stations/{}/tracks'.format(track['id']), + track_id, 'Track station: {}'.format(track['title'])) class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE): @@ -936,7 +944,7 @@ class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): slug, relation = self._match_valid_url(url).group('slug', 'relation') - track = self._download_json( + track = self._call_api( self._resolv_url(self._BASE_URL + slug), slug, 'Downloading track info', headers=self._HEADERS) @@ -946,7 +954,7 @@ class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE): return self._extract_playlist( self._API_V2_BASE + self._BASE_URL_MAP[relation] % track['id'], str(track['id']), - '%s (%s)' % (track.get('title') or slug, relation.capitalize())) + '{} ({})'.format(track.get('title') or slug, relation.capitalize())) class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): @@ -971,7 +979,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): if token: query['secret_token'] = token - data = self._download_json( + data = self._call_api( self._API_V2_BASE + 'playlists/' + playlist_id, playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS) @@ -1006,7 +1014,7 @@ class SoundcloudSearchIE(SoundcloudBaseIE, SearchInfoExtractor): next_url = update_url_query(self._API_V2_BASE + endpoint, query) for i in itertools.count(1): - response = self._download_json( + response = self._call_api( next_url, collection_id, f'Downloading page {i}', 'Unable to download API page', headers=self._HEADERS) diff --git a/yt_dlp/extractor/soundgasm.py b/yt_dlp/extractor/soundgasm.py index 9e59c7c..b524e24 100644 --- a/yt_dlp/extractor/soundgasm.py +++ b/yt_dlp/extractor/soundgasm.py @@ -15,7 +15,7 @@ class SoundgasmIE(InfoExtractor): 'title': 'Piano sample', 'description': 'Royalty Free Sample Music', 'uploader': 'ytdl', - } + }, } def _real_extract(self, url): @@ -69,6 +69,6 @@ class SoundgasmProfileIE(InfoExtractor): entries = [ self.url_result(audio_url, 'Soundgasm') - for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)] + for audio_url in re.findall(rf'href="([^"]+/u/{profile_id}/[^"]+)', webpage)] return self.playlist_result(entries, profile_id) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index e23f192..3d661a8 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -87,7 +87,7 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', 'title': 'Zahnfee Cartman', - 'description': 'md5:b917eec991d388811d911fd1377671ac' + 'description': 'md5:b917eec991d388811d911fd1377671ac', }, }, { # episode @@ -102,7 +102,7 @@ class SouthParkDeIE(SouthParkIE): # XXX: Do not subclass from concrete IE def _get_feed_url(self, uri, url=None): video_id = self._id_from_uri(uri) config = self._download_json( - 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge&ref=%s' % (uri, url), video_id) + f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}', video_id) return self._remove_template_parameter(config['feedWithQueryParams']) def _get_feed_query(self, uri): diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index c73f797..6805a72 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -37,7 +37,7 @@ class SpankBangIE(InfoExtractor): 'timestamp': 1617109572, 'upload_date': '20210330', 'age_limit': 18, - } + }, }, { # 480p only 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang', @@ -72,12 +72,12 @@ class SpankBangIE(InfoExtractor): mobj = self._match_valid_url(url) video_id = mobj.group('id') or mobj.group('id_2') webpage = self._download_webpage( - url.replace('/%s/embed' % video_id, '/%s/video' % video_id), + url.replace(f'/{video_id}/embed', f'/{video_id}/video'), video_id, headers={'Cookie': 'country=US'}) if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): raise ExtractorError( - 'Video %s is not available' % video_id, expected=True) + f'Video {video_id} is not available', expected=True) formats = [] @@ -104,8 +104,7 @@ class SpankBangIE(InfoExtractor): STREAM_URL_PREFIX = 'stream_url_' for mobj in re.finditer( - r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2' - % STREAM_URL_PREFIX, webpage): + rf'{STREAM_URL_PREFIX}(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', webpage): extract_format(mobj.group('id', 'url')) if not formats: @@ -159,7 +158,7 @@ class SpankBangIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': age_limit, - }, info + }, info, ) diff --git a/yt_dlp/extractor/spiegel.py b/yt_dlp/extractor/spiegel.py index 3701e29..0397f83 100644 --- a/yt_dlp/extractor/spiegel.py +++ b/yt_dlp/extractor/spiegel.py @@ -4,7 +4,7 @@ from .jwplatform import JWPlatformIE class SpiegelIE(InfoExtractor): _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' - _VALID_URL = r'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|%s)(?:-embed|-iframe)?(?:\.html)?(?:$|[#?])' % _UUID_RE + _VALID_URL = rf'https?://(?:www\.)?(?:spiegel|manager-magazin)\.de(?:/[^/]+)+/[^/]*-(?P<id>[0-9]+|{_UUID_RE})(?:-embed|-iframe)?(?:\.html)?(?:$|[#?])' _TESTS = [{ 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', 'md5': '50c7948883ec85a3e431a0a44b7ad1d6', @@ -45,7 +45,7 @@ class SpiegelIE(InfoExtractor): '_type': 'url_transparent', 'id': video_id, 'display_id': video_id, - 'url': 'jwplatform:%s' % media_id, + 'url': f'jwplatform:{media_id}', 'title': self._og_search_title(webpage, default=None), 'ie_key': JWPlatformIE.ie_key(), } diff --git a/yt_dlp/extractor/sport5.py b/yt_dlp/extractor/sport5.py index 44b4067..6c45c3e 100644 --- a/yt_dlp/extractor/sport5.py +++ b/yt_dlp/extractor/sport5.py @@ -27,7 +27,7 @@ class Sport5IE(InfoExtractor): 'categories': list, }, 'skip': 'Blocked outside of Israel', - } + }, ] def _real_extract(self, url): @@ -39,13 +39,13 @@ class Sport5IE(InfoExtractor): video_id = self._html_search_regex(r'clipId=([\w-]+)', webpage, 'video id') metadata = self._download_xml( - 'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id, + f'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/{video_id}/HDS/metadata.xml', video_id) error = metadata.find('./Error') if error is not None: raise ExtractorError( - '%s returned error: %s - %s' % ( + '{} returned error: {} - {}'.format( self.IE_NAME, error.find('./Name').text, error.find('./Description').text), diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 30dbcf3..2d6acb8 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -24,7 +24,7 @@ class SportDeutschlandIE(InfoExtractor): 'duration': 32447, 'upload_date': '20230114', 'timestamp': 1673733618, - } + }, }, { 'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', 'info_dict': { @@ -40,7 +40,7 @@ class SportDeutschlandIE(InfoExtractor): 'duration': 41097, 'upload_date': '20220309', 'timestamp': 1646860727.0, - } + }, }, { 'url': 'https://sportdeutschland.tv/ggcbremen/formationswochenende-latein-2023', 'info_dict': { @@ -66,8 +66,8 @@ class SportDeutschlandIE(InfoExtractor): 'upload_date': '20230225', 'timestamp': 1677349909, 'live_status': 'was_live', - } - }] + }, + }], }, { 'url': 'https://sportdeutschland.tv/dtb/gymnastik-international-tag-1', 'info_dict': { @@ -99,7 +99,7 @@ class SportDeutschlandIE(InfoExtractor): **traverse_obj(video, { 'id': 'id', 'duration': ('duration', {lambda x: float(x) > 0 and float(x)}), - 'timestamp': ('created_at', {unified_timestamp}) + 'timestamp': ('created_at', {unified_timestamp}), }), } @@ -120,7 +120,7 @@ class SportDeutschlandIE(InfoExtractor): 'is_live': 'currently_live', 'was_live': 'was_live', 'channel_url': ('profile', 'slug', {lambda x: f'https://sportdeutschland.tv/{x}'}), - }, get_all=False) + }, get_all=False), } parts = traverse_obj(meta, (('livestream', ('videos', ...)), )) diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py index 55ce36a..de67a61 100644 --- a/yt_dlp/extractor/spotify.py +++ b/yt_dlp/extractor/spotify.py @@ -39,7 +39,7 @@ class SpotifyBaseIE(InfoExtractor): 'persistedQuery': { 'sha256Hash': self._OPERATION_HASHES[operation], }, - }) + }), }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN}, **kwargs)['data'] @@ -115,7 +115,7 @@ class SpotifyIE(SpotifyBaseIE): 'duration': 2083.605, 'release_date': '20201217', 'series': "The Guardian's Audio Long Reads", - } + }, }, { 'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA', 'only_matching': True, @@ -124,7 +124,7 @@ class SpotifyIE(SpotifyBaseIE): def _real_extract(self, url): episode_id = self._match_id(url) episode = self._call_api('Episode', episode_id, { - 'uri': 'spotify:episode:' + episode_id + 'uri': 'spotify:episode:' + episode_id, })['episode'] return self._extract_episode( episode, try_get(episode, lambda x: x['podcast']['name'])) diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py index 36a9bd2..d1df459 100644 --- a/yt_dlp/extractor/spreaker.py +++ b/yt_dlp/extractor/spreaker.py @@ -1,7 +1,6 @@ import itertools from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, int_or_none, @@ -16,26 +15,26 @@ def _extract_episode(data, episode_id=None): title = data['title'] download_url = data['download_url'] - series = try_get(data, lambda x: x['show']['title'], compat_str) - uploader = try_get(data, lambda x: x['author']['fullname'], compat_str) + series = try_get(data, lambda x: x['show']['title'], str) + uploader = try_get(data, lambda x: x['author']['fullname'], str) thumbnails = [] for image in ('image_original', 'image_medium', 'image'): - image_url = url_or_none(data.get('%s_url' % image)) + image_url = url_or_none(data.get(f'{image}_url')) if image_url: thumbnails.append({'url': image_url}) def stats(key): return int_or_none(try_get( data, - (lambda x: x['%ss_count' % key], - lambda x: x['stats']['%ss' % key]))) + (lambda x: x[f'{key}s_count'], + lambda x: x['stats'][f'{key}s']))) def duration(key): return float_or_none(data.get(key), scale=1000) return { - 'id': compat_str(episode_id or data['episode_id']), + 'id': str(episode_id or data['episode_id']), 'url': download_url, 'display_id': data.get('permalink'), 'title': title, @@ -97,7 +96,7 @@ class SpreakerIE(InfoExtractor): def _real_extract(self, url): episode_id = self._match_id(url) data = self._download_json( - 'https://api.spreaker.com/v2/episodes/%s' % episode_id, + f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id)['response']['episode'] return _extract_episode(data, episode_id) @@ -116,7 +115,7 @@ class SpreakerPageIE(InfoExtractor): (r'data-episode_id=["\'](?P<id>\d+)', r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id') return self.url_result( - 'https://api.spreaker.com/episode/%s' % episode_id, + f'https://api.spreaker.com/episode/{episode_id}', ie=SpreakerIE.ie_key(), video_id=episode_id) @@ -133,8 +132,8 @@ class SpreakerShowIE(InfoExtractor): def _entries(self, show_id): for page_num in itertools.count(1): episodes = self._download_json( - 'https://api.spreaker.com/show/%s/episodes' % show_id, - show_id, note='Downloading JSON page %d' % page_num, query={ + f'https://api.spreaker.com/show/{show_id}/episodes', + show_id, note=f'Downloading JSON page {page_num}', query={ 'page': page_num, 'max_per_page': 100, }) @@ -169,5 +168,5 @@ class SpreakerShowPageIE(InfoExtractor): show_id = self._search_regex( r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id') return self.url_result( - 'https://api.spreaker.com/show/%s' % show_id, + f'https://api.spreaker.com/show/{show_id}', ie=SpreakerShowIE.ie_key(), video_id=show_id) diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py index bdb8ef4..cd3261d 100644 --- a/yt_dlp/extractor/springboardplatform.py +++ b/yt_dlp/extractor/springboardplatform.py @@ -52,8 +52,7 @@ class SpringboardPlatformIE(InfoExtractor): index = mobj.group('index') or mobj.group('index_2') video = self._download_xml( - 'http://cms.springboardplatform.com/xml_feeds_advanced/index/%s/rss3/%s' - % (index, video_id), video_id) + f'http://cms.springboardplatform.com/xml_feeds_advanced/index/{index}/rss3/{video_id}', video_id) item = xpath_element(video, './/item', 'item', fatal=True) @@ -66,7 +65,7 @@ class SpringboardPlatformIE(InfoExtractor): if 'error_video.mp4' in video_url: raise ExtractorError( - 'Video %s no longer exists' % video_id, expected=True) + f'Video {video_id} no longer exists', expected=True) duration = int_or_none(content.get('duration')) tbr = int_or_none(content.get('bitrate')) diff --git a/yt_dlp/extractor/sproutvideo.py b/yt_dlp/extractor/sproutvideo.py new file mode 100644 index 0000000..c092359 --- /dev/null +++ b/yt_dlp/extractor/sproutvideo.py @@ -0,0 +1,198 @@ +import base64 +import urllib.parse + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + remove_start, + smuggle_url, + unsmuggle_url, + update_url_query, + url_or_none, + urlencode_postdata, +) +from ..utils.traversal import traverse_obj + + +class SproutVideoIE(InfoExtractor): + _NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+' + _VALID_URL = rf'https?:{_NO_SCHEME_RE}' + _EMBED_REGEX = [rf'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']'] + _TESTS = [{ + 'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3', + 'md5': '1343ce1a6cb39d67889bfa07c7b02b0e', + 'info_dict': { + 'id': '4c9dddb01910e3c9c4', + 'ext': 'mp4', + 'title': 'Adrien Labaeye : Berlin, des communautés aux communs', + 'duration': 576, + 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', + }, + }, { + 'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27', + 'md5': 'cebae5cf558cca83271917cf4ec03f26', + 'info_dict': { + 'id': 'a79fdcb21f1be2c62e', + 'ext': 'mp4', + 'title': 'HS_01_Live Stream 2023-01-14 10:00', + 'duration': 703, + 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', + }, + }, { + # http formats 'sd' and 'hd' are available + 'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90', + 'md5': 'f368c78df07e78a749508b221528672c', + 'info_dict': { + 'id': '119cd6bc1a18e6cd98', + 'ext': 'mp4', + 'title': '3. Updating your Partner details', + 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', + 'duration': 60, + }, + 'params': {'format': 'hd'}, + }, { + # subtitles + 'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd', + 'md5': '7f6798f037d7a3e3e07e67959de68fc6', + 'info_dict': { + 'id': '119dd8ba121ee0cc98', + 'ext': 'mp4', + 'title': 'Recipients Setup - Domestic Wire Only', + 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', + 'duration': 77, + 'subtitles': {'en': 'count:1'}, + }, + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs', + 'info_dict': { + 'id': '4c9dddb01910e3c9c4', + 'ext': 'mp4', + 'title': 'Adrien Labaeye : Berlin, des communautés aux communs', + 'duration': 576, + 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', + }, + }] + _M3U8_URL_TMPL = 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8' + _QUALITIES = ('hd', 'uhd', 'source') # Exclude 'sd' to prioritize hls formats above it + + @staticmethod + def _policy_to_qs(policy, signature_key, as_string=False): + query = {} + for key, value in policy['signatures'][signature_key].items(): + query[remove_start(key, 'CloudFront-')] = value + query['sessionID'] = policy['sessionID'] + return urllib.parse.urlencode(query, doseq=True) if as_string else query + + @classmethod + def _extract_embed_urls(cls, url, webpage): + for embed_url in super()._extract_embed_urls(url, webpage): + if embed_url.startswith('//'): + embed_url = f'https:{embed_url}' + yield smuggle_url(embed_url, {'referer': url}) + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + video_id = self._match_id(url) + webpage = self._download_webpage( + url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'})) + data = self._search_json( + r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+', + end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode()) + + formats, subtitles = [], {} + headers = { + 'Accept': '*/*', + 'Origin': 'https://videos.sproutvideo.com', + 'Referer': url, + } + + # HLS extraction is fatal; only attempt it if the JSON data says it's available + if traverse_obj(data, 'hls'): + manifest_query = self._policy_to_qs(data, 'm') + fragment_query = self._policy_to_qs(data, 't', as_string=True) + key_query = self._policy_to_qs(data, 'k', as_string=True) + + formats.extend(self._extract_m3u8_formats( + self._M3U8_URL_TMPL.format(**data), video_id, 'mp4', + m3u8_id='hls', headers=headers, query=manifest_query)) + for fmt in formats: + fmt.update({ + 'url': update_url_query(fmt['url'], manifest_query), + 'extra_param_to_segment_url': fragment_query, + 'extra_param_to_key_url': key_query, + }) + + if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))): + quality = qualities(self._QUALITIES) + acodec = 'none' if data.get('has_audio') is False else None + formats.extend([{ + 'format_id': str(format_id), + 'url': format_url, + 'ext': 'mp4', + 'quality': quality(format_id), + 'acodec': acodec, + } for format_id, format_url in downloads]) + + for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))): + subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({ + 'url': sub_data['src'], + }) + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'http_headers': headers, + **traverse_obj(data, { + 'title': ('title', {str}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': ('posterframe_url', {url_or_none}), + }), + } + + +class VidsIoIE(InfoExtractor): + IE_NAME = 'vids.io' + _VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)' + _TESTS = [{ + 'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming', + 'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e', + 'info_dict': { + 'id': '799cd8b11c10efc1f0', + 'ext': 'mp4', + 'title': 'How to Video: Live Streaming', + 'duration': 2787, + 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg', + }, + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).group('id', 'display_id') + webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403) + + if urlh.status == 403: + password = self.get_param('videopassword') + if not password: + raise ExtractorError( + 'This video is password-protected; use the --video-password option', expected=True) + try: + webpage = self._download_webpage( + url, display_id, 'Submitting video password', + data=urlencode_postdata({ + 'password': password, + **self._hidden_inputs(webpage), + })) + # Requests with user's session cookie `_sproutvideo_session` are now authorized + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + raise ExtractorError('Incorrect password', expected=True) + raise + + if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None): + return self.url_result(embed_url, SproutVideoIE, video_id) + + raise ExtractorError('Unable to extract any SproutVideo embed url') diff --git a/yt_dlp/extractor/srgssr.py b/yt_dlp/extractor/srgssr.py index 145f25e..c01fd12 100644 --- a/yt_dlp/extractor/srgssr.py +++ b/yt_dlp/extractor/srgssr.py @@ -48,7 +48,7 @@ class SRGSSRIE(InfoExtractor): def _get_tokenized_src(self, url, video_id, format_id): token = self._download_json( 'http://tp.srgssr.ch/akahd/token?acl=*', - video_id, 'Downloading %s token' % format_id, fatal=False) or {} + video_id, f'Downloading {format_id} token', fatal=False) or {} auth_params = try_get(token, lambda x: x['token']['authparams']) if auth_params: url += ('?' if '?' not in url else '&') + auth_params @@ -57,8 +57,7 @@ class SRGSSRIE(InfoExtractor): def _get_media_data(self, bu, media_type, media_id): query = {'onlyChapters': True} if media_type == 'video' else {} full_media_data = self._download_json( - 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json' - % (bu, media_type, media_id), + f'https://il.srgssr.ch/integrationlayer/2.0/{bu}/mediaComposition/{media_type}/{media_id}.json', media_id, query=query)['chapterList'] try: media_data = next( @@ -73,7 +72,7 @@ class SRGSSRIE(InfoExtractor): self.raise_geo_restricted( msg=message, countries=self._GEO_COUNTRIES) raise ExtractorError( - '%s said: %s' % (self.IE_NAME, message), expected=True) + f'{self.IE_NAME} said: {message}', expected=True) return media_data @@ -119,7 +118,7 @@ class SRGSSRIE(InfoExtractor): # whole episode. if int_or_none(media_data.get('position')) == 0: for p in ('S', 'H'): - podcast_url = media_data.get('podcast%sdUrl' % p) + podcast_url = media_data.get(f'podcast{p}dUrl') if not podcast_url: continue quality = p + 'D' @@ -207,7 +206,7 @@ class SRGSSRPlayIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270', 'info_dict': { @@ -223,7 +222,7 @@ class SRGSSRPlayIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01', 'only_matching': True, @@ -244,4 +243,4 @@ class SRGSSRPlayIE(InfoExtractor): bu = mobj.group('bu') media_type = mobj.group('type') or mobj.group('type_2') media_id = mobj.group('id') - return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR') + return self.url_result(f'srgssr:{bu[:3]}:{media_type}:{media_id}', 'SRGSSR') diff --git a/yt_dlp/extractor/srmediathek.py b/yt_dlp/extractor/srmediathek.py index f0b3b58..fc63d9b 100644 --- a/yt_dlp/extractor/srmediathek.py +++ b/yt_dlp/extractor/srmediathek.py @@ -43,7 +43,7 @@ class SRMediathekIE(ARDMediathekBaseIE): webpage = self._download_webpage(url, video_id) if '>Der gewünschte Beitrag ist leider nicht mehr verfügbar.<' in webpage: - raise ExtractorError('Video %s is no longer available' % video_id, expected=True) + raise ExtractorError(f'Video {video_id} is no longer available', expected=True) media_collection_url = self._search_regex( r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url') diff --git a/yt_dlp/extractor/stageplus.py b/yt_dlp/extractor/stageplus.py index 77e4362..6399072 100644 --- a/yt_dlp/extractor/stageplus.py +++ b/yt_dlp/extractor/stageplus.py @@ -468,7 +468,7 @@ fragment BannerFields on Banner { }, data=json.dumps({ 'query': self._GRAPHQL_QUERY, 'variables': {'videoId': concert_id}, - 'operationName': 'videoDetailPage' + 'operationName': 'videoDetailPage', }, separators=(',', ':')).encode())['data']['node'] metadata = traverse_obj(data, { diff --git a/yt_dlp/extractor/stanfordoc.py b/yt_dlp/extractor/stanfordoc.py index be0f4af..ab41091 100644 --- a/yt_dlp/extractor/stanfordoc.py +++ b/yt_dlp/extractor/stanfordoc.py @@ -19,7 +19,7 @@ class StanfordOpenClassroomIE(InfoExtractor): 'id': 'PracticalUnix_intro-environment', 'ext': 'mp4', 'title': 'Intro Environment', - } + }, } def _real_extract(self, url): @@ -34,12 +34,12 @@ class StanfordOpenClassroomIE(InfoExtractor): 'upload_date': None, } - baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' - xmlUrl = baseUrl + video + '.xml' - mdoc = self._download_xml(xmlUrl, info['id']) + base_url = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' + xml_url = base_url + video + '.xml' + mdoc = self._download_xml(xml_url, info['id']) try: info['title'] = mdoc.findall('./title')[0].text - info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text + info['url'] = base_url + mdoc.findall('./videoFile')[0].text except IndexError: raise ExtractorError('Invalid metadata XML file') return info @@ -66,7 +66,7 @@ class StanfordOpenClassroomIE(InfoExtractor): links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage)) info['entries'] = [self.url_result( - 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) + f'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}', ) for l in links] return info else: # Root page @@ -78,12 +78,12 @@ class StanfordOpenClassroomIE(InfoExtractor): } info['title'] = info['id'] - rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' - rootpage = self._download_webpage(rootURL, info['id'], + root_url = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' + rootpage = self._download_webpage(root_url, info['id'], errnote='Unable to download course info page') links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage)) info['entries'] = [self.url_result( - 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) + f'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}', ) for l in links] return info diff --git a/yt_dlp/extractor/startrek.py b/yt_dlp/extractor/startrek.py index 94efb58..c591871 100644 --- a/yt_dlp/extractor/startrek.py +++ b/yt_dlp/extractor/startrek.py @@ -22,7 +22,7 @@ class StarTrekIE(InfoExtractor): }, { 'url': 'https://media.startrek.com/2022/06/16/2043801155561/1069981_hls/trr_snw_107_v4-c4bfc25d/stream_vtt.m3u8', }]}, - } + }, }, { 'url': 'https://www.startrek.com/videos/watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room', 'md5': 'f5ad74fbb86e91e0882fc0a333178d1d', @@ -38,7 +38,7 @@ class StarTrekIE(InfoExtractor): 'subtitles': {'en-US': [{ 'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_105_v5\.vtt', }]}, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py index 312a4fd..8cb5765 100644 --- a/yt_dlp/extractor/startv.py +++ b/yt_dlp/extractor/startv.py @@ -1,7 +1,4 @@ from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( ExtractorError, clean_html, @@ -11,14 +8,14 @@ from ..utils import ( class StarTVIE(InfoExtractor): - _VALID_URL = r"""(?x) + _VALID_URL = r'''(?x) https?://(?:www\.)?startv\.com\.tr/ (?: (?:dizi|program)/(?:[^/?#&]+)/(?:bolumler|fragmanlar|ekstralar)| video/arsiv/(?:dizi|program)/(?:[^/?#&]+) )/ (?P<id>[^/?#&]+) - """ + ''' IE_NAME = 'startv' _TESTS = [ { @@ -32,41 +29,41 @@ class StarTVIE(InfoExtractor): 'description': 'md5:3a8049f05a75c2e8747116a673275de4', 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$', 'timestamp': 1569281400, - 'upload_date': '20190923' + 'upload_date': '20190923', }, }, { 'url': 'https://www.startv.com.tr/video/arsiv/dizi/avlu/44-bolum', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.startv.com.tr/dizi/cocuk/fragmanlar/5-bolum-fragmani', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.startv.com.tr/dizi/cocuk/ekstralar/5-bolumun-nefes-kesen-final-sahnesi', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/bolumler/1-bolum', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/fragmanlar/2-fragman', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/14-bolumde-hangi-unlu-ne-sordu-', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/buyuk-risk-334-bolum', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.startv.com.tr/video/arsiv/program/dada/dada-58-bolum', - 'only_matching': True - } + 'only_matching': True, + }, ] def _real_extract(self, url): @@ -80,7 +77,7 @@ class StarTVIE(InfoExtractor): if not info: raise ExtractorError('Failed to extract API data') - video_id = compat_str(info.get('id')) + video_id = str(info.get('id')) title = info.get('title') or self._og_search_title(webpage) description = clean_html(info.get('description')) or self._og_search_description(webpage, default=None) thumbnail = self._proto_relative_url( @@ -96,5 +93,5 @@ class StarTVIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'timestamp': int_or_none(info.get('release_date')), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py index 63da966..b7f8ac3 100644 --- a/yt_dlp/extractor/steam.py +++ b/yt_dlp/extractor/steam.py @@ -10,7 +10,7 @@ from ..utils import ( class SteamIE(InfoExtractor): - _VALID_URL = r"""(?x) + _VALID_URL = r'''(?x) https?://(?:store\.steampowered|steamcommunity)\.com/ (?:agecheck/)? (?P<urltype>video|app)/ #If the page is only for videos or for a game @@ -18,7 +18,7 @@ class SteamIE(InfoExtractor): (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID | https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+) - """ + ''' _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' _TESTS = [{ @@ -31,7 +31,7 @@ class SteamIE(InfoExtractor): 'ext': 'mp4', 'title': 'Terraria video 256785003', 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com', - } + }, }, { 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592', @@ -40,8 +40,8 @@ class SteamIE(InfoExtractor): 'ext': 'mp4', 'title': 'Terraria video 2040428', 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com', - } - } + }, + }, ], 'info_dict': { 'id': '105600', @@ -49,7 +49,7 @@ class SteamIE(InfoExtractor): }, 'params': { 'playlistend': 2, - } + }, }, { 'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/', 'info_dict': { @@ -61,13 +61,13 @@ class SteamIE(InfoExtractor): def _real_extract(self, url): m = self._match_valid_url(url) - fileID = m.group('fileID') - if fileID: + file_id = m.group('fileID') + if file_id: video_url = url - playlist_id = fileID + playlist_id = file_id else: - gameID = m.group('gameID') - playlist_id = gameID + game_id = m.group('gameID') + playlist_id = game_id video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id self._set_cookie('steampowered.com', 'wants_mature_content', '1') @@ -99,7 +99,7 @@ class SteamIE(InfoExtractor): entry['thumbnail'] = movie.get('data-poster') for quality in ('', '-hd'): for ext in ('webm', 'mp4'): - video_url = movie.get('data-%s%s-source' % (ext, quality)) + video_url = movie.get(f'data-{ext}{quality}-source') if video_url: formats.append({ 'format_id': ext + quality, diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py index 46a15e6..09ebabb 100644 --- a/yt_dlp/extractor/stitcher.py +++ b/yt_dlp/extractor/stitcher.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, clean_html, @@ -37,7 +36,7 @@ class StitcherBaseIE(InfoExtractor): def _extract_episode(self, episode, audio_url, show_info): info = { - 'id': compat_str(episode['id']), + 'id': str(episode['id']), 'display_id': episode.get('slug'), 'title': episode['title'].strip(), 'description': self._extract_description(episode), @@ -126,7 +125,7 @@ class StitcherShowIE(StitcherBaseIE): def _real_extract(self, url): show_slug = self._match_id(url) data = self._call_api( - 'search/show/%s/allEpisodes' % show_slug, show_slug, {'count': 10000}) + f'search/show/{show_slug}/allEpisodes', show_slug, {'count': 10000}) show = try_get(data, lambda x: x['shows'][0], dict) or {} show_info = self._extract_show_info(show) diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index 20a70a7..d5430e6 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -14,8 +14,8 @@ class StoryFireBaseIE(InfoExtractor): def _call_api(self, path, video_id, resource, query=None): return self._download_json( - 'https://storyfire.com/app/%s/%s' % (path, video_id), video_id, - 'Downloading %s JSON metadata' % resource, query=query) + f'https://storyfire.com/app/{path}/{video_id}', video_id, + f'Downloading {resource} JSON metadata', query=query) def _parse_video(self, video): title = video['title'] @@ -69,7 +69,7 @@ class StoryFireIE(StoryFireBaseIE): 'params': { 'skip_download': True, }, - 'expected_warnings': ['Unable to download JSON metadata'] + 'expected_warnings': ['Unable to download JSON metadata'], } def _real_extract(self, url): @@ -92,7 +92,7 @@ class StoryFireUserIE(StoryFireBaseIE): def _fetch_page(self, user_id, page): videos = self._call_api( - 'publicVideos', user_id, 'page %d' % (page + 1), { + 'publicVideos', user_id, f'page {page + 1}', { 'skip': page * self._PAGE_SIZE, })['videos'] for video in videos: diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index c303ac5..62ae0ee 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -25,7 +25,7 @@ class StreamableIE(InfoExtractor): 'upload_date': '20160208', 'duration': 61.516, 'view_count': int, - } + }, }, # older video without bitrate, width/height, codecs, etc. info { @@ -40,7 +40,7 @@ class StreamableIE(InfoExtractor): 'upload_date': '20150311', 'duration': 12, 'view_count': int, - } + }, }, { 'url': 'https://streamable.com/e/dnd1', @@ -49,7 +49,7 @@ class StreamableIE(InfoExtractor): { 'url': 'https://streamable.com/s/okkqk/drxjds', 'only_matching': True, - } + }, ] def _real_extract(self, url): @@ -59,7 +59,7 @@ class StreamableIE(InfoExtractor): # to return video info like the title properly sometimes, and doesn't # include info like the video duration video = self._download_json( - 'https://ajax.streamable.com/videos/%s' % video_id, video_id) + f'https://ajax.streamable.com/videos/{video_id}', video_id) # Format IDs: # 0 The video is being uploaded @@ -99,5 +99,5 @@ class StreamableIE(InfoExtractor): 'timestamp': float_or_none(video.get('date_added')), 'duration': float_or_none(video.get('duration')), 'view_count': int_or_none(video.get('plays')), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py index c4537ba..4122ba8 100644 --- a/yt_dlp/extractor/streamcz.py +++ b/yt_dlp/extractor/streamcz.py @@ -23,7 +23,7 @@ class StreamCZIE(InfoExtractor): 'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165', 'duration': 1369.6, 'view_count': int, - } + }, }, { 'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937', 'md5': '41fd358000086a1ccdb068c77809b158', @@ -35,7 +35,7 @@ class StreamCZIE(InfoExtractor): 'description': 'md5:97a811000a6460266029d6c1c2ebcd59', 'duration': 50.2, 'view_count': int, - } + }, }, { 'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267', 'md5': '3ee4d0be040e8f4a543e67e509d55e3f', @@ -47,7 +47,7 @@ class StreamCZIE(InfoExtractor): 'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf', 'duration': 442.84, 'view_count': int, - } + }, }] def _extract_formats(self, spl_url, video): @@ -86,9 +86,9 @@ class StreamCZIE(InfoExtractor): perex duration views - }''' - }).encode('utf-8'), - headers={'Content-Type': 'application/json;charset=UTF-8'} + }''', + }).encode(), + headers={'Content-Type': 'application/json;charset=UTF-8'}, )['data']['episode'] spl_url = data['spl'] + 'spl2,3' @@ -105,7 +105,7 @@ class StreamCZIE(InfoExtractor): for ext, sub_url in subs.get('urls').items(): subtitles.setdefault(subs['language'], []).append({ 'ext': ext, - 'url': urljoin(spl_url, sub_url) + 'url': urljoin(spl_url, sub_url), }) formats = list(self._extract_formats(spl_url, video)) diff --git a/yt_dlp/extractor/streetvoice.py b/yt_dlp/extractor/streetvoice.py index a32c8bc..60056f7 100644 --- a/yt_dlp/extractor/streetvoice.py +++ b/yt_dlp/extractor/streetvoice.py @@ -33,7 +33,7 @@ class StreetVoiceIE(InfoExtractor): 'track': '流浪', 'track_id': '123688', 'album': '2010', - } + }, }, { 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/', 'only_matching': True, @@ -41,7 +41,7 @@ class StreetVoiceIE(InfoExtractor): def _real_extract(self, url): song_id = self._match_id(url) - base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id + base_url = f'https://streetvoice.com/api/v4/song/{song_id}/' song = self._download_json(base_url, song_id, query={ 'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username', }) @@ -51,7 +51,7 @@ class StreetVoiceIE(InfoExtractor): for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]: f_url = (self._download_json( base_url + suffix + '/', song_id, - 'Downloading %s format URL' % format_id, + f'Downloading {format_id} format URL', data=b'', fatal=False) or {}).get('file') if not f_url: continue @@ -86,7 +86,7 @@ class StreetVoiceIE(InfoExtractor): 'timestamp': parse_iso8601(song.get('created_at')), 'uploader': try_get(user, lambda x: x['profile']['nickname']), 'uploader_id': str_or_none(user.get('id')), - 'uploader_url': urljoin(url, '/%s/' % username) if username else None, + 'uploader_url': urljoin(url, f'/{username}/') if username else None, 'view_count': get_count('plays'), 'like_count': get_count('likes'), 'comment_count': get_count('comments'), diff --git a/yt_dlp/extractor/stretchinternet.py b/yt_dlp/extractor/stretchinternet.py index e438dee..232837d 100644 --- a/yt_dlp/extractor/stretchinternet.py +++ b/yt_dlp/extractor/stretchinternet.py @@ -12,7 +12,7 @@ class StretchInternetIE(InfoExtractor): # 'timestamp': 1575668361, # 'upload_date': '20191206', 'uploader_id': '99997', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index a847925..31c8afb 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -22,7 +22,7 @@ class StripchatIE(InfoExtractor): 'skip': 'Room is offline', }, { 'url': 'https://stripchat.com/Rakhijaan@xh', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py index 0ab7801..c489e42 100644 --- a/yt_dlp/extractor/stv.py +++ b/yt_dlp/extractor/stv.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, int_or_none, @@ -21,7 +20,7 @@ class STVPlayerIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20170301', 'title': '60 seconds on set with Laura Norton', - 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!", + 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let's find out!", 'timestamp': 1488388054, 'uploader_id': '1486976045', }, @@ -47,13 +46,13 @@ class STVPlayerIE(InfoExtractor): api_path, resp = None, {} for k, v in player_api_cache.items(): - if k.startswith('/episodes/') or k.startswith('/shortform/'): + if k.startswith(('/episodes/', '/shortform/')): api_path, resp = k, v break else: episode_id = str_or_none(try_get( props, lambda x: x['pageProps']['episodeId'])) - api_path = '/%s/%s' % (self._PTYPE_MAP[ptype], episode_id or video_id) + api_path = f'/{self._PTYPE_MAP[ptype]}/{episode_id or video_id}' result = resp.get('results') if not result: @@ -62,7 +61,7 @@ class STVPlayerIE(InfoExtractor): result = resp['results'] video = result['video'] - video_id = compat_str(video['id']) + video_id = str(video['id']) subtitles = {} _subtitles = result.get('_subtitles') or {} diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index 6ee3f75..30cb322 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -18,7 +18,7 @@ class SubstackIE(InfoExtractor): 'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18', 'uploader': 'Maybe Baby', 'uploader_id': '33628', - } + }, }, { 'url': 'https://haleynahman.substack.com/p/-dear-danny-i-found-my-boyfriends?s=r', 'md5': '0a63eacec877a1171a62cfa69710fcea', @@ -30,7 +30,7 @@ class SubstackIE(InfoExtractor): 'thumbnail': 'md5:daa40b6b79249417c14ff8103db29639', 'uploader': 'Maybe Baby', 'uploader_id': '33628', - } + }, }, { 'url': 'https://andrewzimmern.substack.com/p/mussels-with-black-bean-sauce-recipe', 'md5': 'fd3c07077b02444ff0130715b5f632bb', @@ -42,7 +42,7 @@ class SubstackIE(InfoExtractor): 'thumbnail': 'md5:e30bfaa9da40e82aa62354263a9dd232', 'uploader': "Andrew Zimmern's Spilled Milk ", 'uploader_id': '577659', - } + }, }] @classmethod @@ -54,7 +54,7 @@ class SubstackIE(InfoExtractor): if mobj: parsed = urllib.parse.urlparse(url) yield parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl() - raise cls.StopExtraction() + raise cls.StopExtraction def _extract_video_formats(self, video_id, url): formats, subtitles = [], {} diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py index 501156e..6078d50 100644 --- a/yt_dlp/extractor/sunporno.py +++ b/yt_dlp/extractor/sunporno.py @@ -22,7 +22,7 @@ class SunPornoIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 302, 'age_limit': 18, - } + }, }, { 'url': 'http://embeds.sunporno.com/embed/807778', 'only_matching': True, @@ -32,7 +32,7 @@ class SunPornoIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.sunporno.com/videos/%s' % video_id, video_id) + f'http://www.sunporno.com/videos/{video_id}', video_id) title = self._html_extract_title(webpage) description = self._html_search_meta( diff --git a/yt_dlp/extractor/sverigesradio.py b/yt_dlp/extractor/sverigesradio.py index 01a07b3..944ce18 100644 --- a/yt_dlp/extractor/sverigesradio.py +++ b/yt_dlp/extractor/sverigesradio.py @@ -51,7 +51,7 @@ class SverigesRadioBaseIE(InfoExtractor): query['quality'] = quality audio_url_data = self._download_json( self._BASE_URL + 'getaudiourl', audio_id, - 'Downloading %s format JSON metadata' % quality, + f'Downloading {quality} format JSON metadata', fatal=False, query=query) or {} audio_url = audio_url_data.get('audioUrl') if not audio_url or audio_url in urls: diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py index 573147a..38782ab 100644 --- a/yt_dlp/extractor/svt.py +++ b/yt_dlp/extractor/svt.py @@ -2,7 +2,6 @@ import json import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, dict_get, @@ -100,7 +99,7 @@ class SVTBaseIE(InfoExtractor): class SVTIE(SVTBaseIE): _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' - _EMBED_REGEX = [r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % _VALID_URL] + _EMBED_REGEX = [rf'(?:<iframe src|href)="(?P<url>{_VALID_URL}[^"]*)"'] _TEST = { 'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', 'md5': '33e9a5d8f646523ce0868ecfb0eed77d', @@ -119,7 +118,7 @@ class SVTIE(SVTBaseIE): article_id = mobj.group('id') info = self._download_json( - 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), + f'http://www.svt.se/wd?widgetId={widget_id}&articleId={article_id}&format=json&type=embed&output=json', article_id) info_dict = self._extract_video(info['video'], article_id) @@ -159,7 +158,7 @@ class SVTPlayIE(SVTPlayBaseIE): 'subtitles': { 'sv': [{ 'ext': 'vtt', - }] + }], }, }, 'params': { @@ -181,7 +180,7 @@ class SVTPlayIE(SVTPlayBaseIE): 'episode': '1. Farlig kryssning', 'series': 'Rederiet', 'subtitles': { - 'sv': 'count:3' + 'sv': 'count:3', }, }, 'params': { @@ -236,7 +235,7 @@ class SVTPlayIE(SVTPlayBaseIE): def _extract_by_video_id(self, video_id, webpage=None): data = self._download_json( - 'https://api.svt.se/videoplayer-api/video/%s' % video_id, + f'https://api.svt.se/videoplayer-api/video/{video_id}', video_id, headers=self.geo_verification_headers()) info_dict = self._extract_video(data, video_id) if not info_dict.get('title'): @@ -281,7 +280,7 @@ class SVTPlayIE(SVTPlayBaseIE): svt_id = try_get( data, lambda x: x['statistics']['dataLake']['content']['id'], - compat_str) + str) if not svt_id: nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False) @@ -323,7 +322,7 @@ class SVTSeriesIE(SVTPlayBaseIE): @classmethod def suitable(cls, url): - return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url) + return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super().suitable(url) def _real_extract(self, url): series_slug, season_id = self._match_valid_url(url).groups() @@ -349,7 +348,7 @@ class SVTSeriesIE(SVTPlayBaseIE): name shortDescription } -}''' % series_slug, +}''' % series_slug, # noqa: UP031 })['data']['listablesBySlug'][0] season_name = None @@ -368,7 +367,7 @@ class SVTSeriesIE(SVTPlayBaseIE): for item in items: video = item.get('item') or {} content_id = video.get('videoSvtId') - if not content_id or not isinstance(content_id, compat_str): + if not content_id or not isinstance(content_id, str): continue entries.append(self.url_result( 'svt:' + content_id, SVTPlayIE.ie_key(), content_id)) @@ -377,7 +376,7 @@ class SVTSeriesIE(SVTPlayBaseIE): season_name = season_name or season_id if title and season_name: - title = '%s - %s' % (title, season_name) + title = f'{title} - {season_name}' elif season_id: title = season_id @@ -401,32 +400,32 @@ class SVTPageIE(SVTBaseIE): 'id': 'jXvk42E', 'title': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare', 'ext': 'mp4', - "duration": 80, + 'duration': 80, 'age_limit': 0, 'timestamp': 1704370009, 'episode': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare', 'series': 'Lokala Nyheter Skåne', - 'upload_date': '20240104' + 'upload_date': '20240104', }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'https://www.svt.se/nyheter/svtforum/2023-tungt-ar-for-svensk-media', 'info_dict': { 'title': '2023 tungt år för svensk media', 'id': 'ewqAZv4', 'ext': 'mp4', - "duration": 3074, + 'duration': 3074, 'age_limit': 0, 'series': '', 'timestamp': 1702980479, 'upload_date': '20231219', - 'episode': 'Mediestudier' + 'episode': 'Mediestudier', }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa', 'info_dict': { @@ -434,7 +433,7 @@ class SVTPageIE(SVTBaseIE): 'title': 'Bakom masken – Lehners kamp mot mental ohälsa', }, 'playlist_count': 4, - 'skip': 'Video is gone' + 'skip': 'Video is gone', }, { 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien', 'info_dict': { @@ -442,7 +441,7 @@ class SVTPageIE(SVTBaseIE): 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien', }, 'playlist_count': 2, - 'skip': 'Video is gone' + 'skip': 'Video is gone', }, { # only programTitle 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun', @@ -453,7 +452,7 @@ class SVTPageIE(SVTBaseIE): 'duration': 27, 'age_limit': 0, }, - 'skip': 'Video is gone' + 'skip': 'Video is gone', }, { 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1', 'only_matching': True, @@ -464,7 +463,7 @@ class SVTPageIE(SVTBaseIE): @classmethod def suitable(cls, url): - return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTPageIE, cls).suitable(url) + return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super().suitable(url) def _real_extract(self, url): display_id = self._match_id(url) @@ -479,7 +478,7 @@ class SVTPageIE(SVTBaseIE): def entries(): for video_id in set(traverse_obj(data, ( - 'page', (('topMedia', 'svtId'), ('body', ..., 'video', 'svtId')), {str} + 'page', (('topMedia', 'svtId'), ('body', ..., 'video', 'svtId')), {str}, ))): info = self._extract_video( self._download_json(f'https://api.svt.se/video/{video_id}', video_id), video_id) diff --git a/yt_dlp/extractor/swearnet.py b/yt_dlp/extractor/swearnet.py index aeaff28..b4835c5 100644 --- a/yt_dlp/extractor/swearnet.py +++ b/yt_dlp/extractor/swearnet.py @@ -17,7 +17,7 @@ class SwearnetEpisodeIE(InfoExtractor): 'title': 'Episode 1 - Grilled Cheese Sammich', 'season_number': 1, 'thumbnail': 'https://cdn.vidyard.com/thumbnails/232819/_RX04IKIq60a2V6rIRqq_Q_small.jpg', - } + }, }] def _get_formats_and_subtitle(self, video_source, video_id): @@ -32,7 +32,7 @@ class SwearnetEpisodeIE(InfoExtractor): else: formats.extend({ 'url': video_mp4.get('url'), - 'ext': 'mp4' + 'ext': 'mp4', } for video_mp4 in value) return formats, subtitles @@ -42,7 +42,7 @@ class SwearnetEpisodeIE(InfoExtractor): for caption in caption_json: subs.setdefault(caption.get('language') or 'und', []).append({ 'url': caption.get('vttUrl'), - 'name': caption.get('name') + 'name': caption.get('name'), }) return subs @@ -75,5 +75,5 @@ class SwearnetEpisodeIE(InfoExtractor): 'season_number': int_or_none(season_number), 'episode_number': int_or_none(episode_number), 'thumbnails': [{'url': thumbnail_url} - for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))] + for thumbnail_url in traverse_obj(json_data, ('thumbnailUrls', ...))], } diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py index 29e5e57..a32b500 100644 --- a/yt_dlp/extractor/syfy.py +++ b/yt_dlp/extractor/syfy.py @@ -30,9 +30,9 @@ class SyfyIE(AdobePassIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - syfy_mpx = list(self._parse_json(self._search_regex( + syfy_mpx = next(iter(self._parse_json(self._search_regex( r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), - display_id)['syfy']['syfy_mpx'].values())[0] + display_id)['syfy']['syfy_mpx'].values())) video_id = syfy_mpx['mpxGUID'] title = syfy_mpx['episodeTitle'] query = { diff --git a/yt_dlp/extractor/syvdk.py b/yt_dlp/extractor/syvdk.py index 287fb26..ec16683 100644 --- a/yt_dlp/extractor/syvdk.py +++ b/yt_dlp/extractor/syvdk.py @@ -13,8 +13,8 @@ class SYVDKIE(InfoExtractor): 'display_id': 'isabella-arendt-stiller-op-for-de-konservative-2', 'ext': 'mp3', 'title': 'Isabella Arendt stiller op for De Konservative', - 'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06' - } + 'description': 'md5:f5fa6a431813bf37284f3412ad7c6c06', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py index c69c13d..4c537df 100644 --- a/yt_dlp/extractor/tagesschau.py +++ b/yt_dlp/extractor/tagesschau.py @@ -141,10 +141,10 @@ class TagesschauIE(InfoExtractor): if not formats: continue entries.append({ - 'id': '%s-%d' % (display_id, num), + 'id': f'{display_id}-{num}', 'title': try_get(video, lambda x: x['mc']['_title']), 'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])), - 'formats': formats + 'formats': formats, }) if not entries: diff --git a/yt_dlp/extractor/taptap.py b/yt_dlp/extractor/taptap.py index 56f2f0e..e4c31da 100644 --- a/yt_dlp/extractor/taptap.py +++ b/yt_dlp/extractor/taptap.py @@ -31,9 +31,9 @@ class TapTapBaseIE(InfoExtractor): # h265 playlist contains both h265 and h264 formats video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any)) formats = self._extract_m3u8_formats(video_url, video_id, fatal=False) - for format in formats: - if re.search(r'^(hev|hvc|hvt)\d', format.get('vcodec', '')): - format['format_id'] = join_nonempty(format.get('format_id'), 'h265', delim='_') + for fmt in formats: + if re.search(r'^(hev|hvc|hvt)\d', fmt.get('vcodec', '')): + fmt['format_id'] = join_nonempty(fmt.get('format_id'), 'h265', delim='_') return { 'id': str(video_id), @@ -41,7 +41,7 @@ class TapTapBaseIE(InfoExtractor): **traverse_obj(video_data, ({ 'duration': ('info', 'duration', {int_or_none}), 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}), - }), get_all=False) + }), get_all=False), } def _real_extract(self, url): @@ -54,8 +54,8 @@ class TapTapBaseIE(InfoExtractor): metainfo = traverse_obj(data, self._META_PATH) entries = [{ **metainfo, - **self._extract_video(id) - } for id in set(traverse_obj(data, self._ID_PATH))] + **self._extract_video(id_), + } for id_ in set(traverse_obj(data, self._ID_PATH))] return self.playlist_result(entries, **metainfo, id=video_id) @@ -100,7 +100,7 @@ class TapTapMomentIE(TapTapBaseIE): 'uploader': '乌酱', 'uploader_id': '532896', 'thumbnail': r're:^https?://.*\.(png|jpg)', - } + }, }], 'params': {'skip_download': 'm3u8'}, }, { @@ -131,7 +131,7 @@ class TapTapMomentIE(TapTapBaseIE): 'uploader': '崩坏:星穹铁道', 'uploader_id': '414732580', 'thumbnail': r're:^https?://.*\.(png|jpg)', - } + }, }], 'params': {'skip_download': 'm3u8'}, }, { @@ -176,7 +176,7 @@ class TapTapAppIE(TapTapBaseIE): 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', 'duration': 26, 'thumbnail': r're:^https?://.*\.(png|jpg)', - } + }, }, { 'info_dict': { 'id': '4058462', @@ -185,7 +185,7 @@ class TapTapAppIE(TapTapBaseIE): 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', 'duration': 295, 'thumbnail': r're:^https?://.*\.(png|jpg)', - } + }, }], 'params': {'skip_download': 'm3u8'}, }] @@ -221,7 +221,7 @@ class TapTapAppIntlIE(TapTapIntlBase): 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182', 'duration': 78, 'thumbnail': r're:^https?://.*\.(png|jpg)', - } + }, }], 'params': {'skip_download': 'm3u8'}, }] @@ -269,7 +269,7 @@ class TapTapPostIntlIE(TapTapIntlBase): 'uploader': 'TapTap Editor', 'uploader_id': '80224473', 'thumbnail': r're:^https?://.*\.(png|jpg)', - } + }, }], 'params': {'skip_download': 'm3u8'}, }] diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index 4e17859..9b9aa50 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .turner import TurnerBaseIE -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( float_or_none, int_or_none, @@ -27,7 +24,7 @@ class TBSIE(TurnerBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew', 'only_matching': True, @@ -42,20 +39,20 @@ class TBSIE(TurnerBaseIE): drupal_settings = self._parse_json(self._search_regex( r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>', webpage, 'drupal setting'), display_id) - isLive = 'watchtnt' in path or 'watchtbs' in path - video_data = next(v for v in drupal_settings['turner_playlist'] if isLive or v.get('url') == path) + is_live = 'watchtnt' in path or 'watchtbs' in path + video_data = next(v for v in drupal_settings['turner_playlist'] if is_live or v.get('url') == path) media_id = video_data['mediaID'] title = video_data['title'] - tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse( + tokenizer_query = urllib.parse.parse_qs(urllib.parse.urlparse( drupal_settings['ngtv_token_url']).query) info = self._extract_ngtv_info( media_id, tokenizer_query, { 'url': url, 'site_name': site[:3].upper(), - 'auth_required': video_data.get('authRequired') == '1' or isLive, - 'is_live': isLive + 'auth_required': video_data.get('authRequired') == '1' or is_live, + 'is_live': is_live, }) thumbnails = [] @@ -84,6 +81,6 @@ class TBSIE(TurnerBaseIE): 'season_number': int_or_none(video_data.get('season')), 'episode_number': int_or_none(video_data.get('episode')), 'thumbnails': thumbnails, - 'is_live': isLive + 'is_live': is_live, }) return info diff --git a/yt_dlp/extractor/tbsjp.py b/yt_dlp/extractor/tbsjp.py index 77ddeca..32f9cfb 100644 --- a/yt_dlp/extractor/tbsjp.py +++ b/yt_dlp/extractor/tbsjp.py @@ -92,8 +92,8 @@ class TBSJPProgramIE(InfoExtractor): 'categories': ['エンタメ', 'ミライカプセル', '会社', '働く', 'バラエティ', '動画'], 'description': '幼少期の夢は大人になって、どう成長したのだろうか?\nそしてその夢は今後、どのように広がっていくのか?\nいま話題の会社で働く人の「夢の成長」を描く', 'series': 'ミライカプセル -I have a dream-', - 'title': 'ミライカプセル -I have a dream-' - } + 'title': 'ミライカプセル -I have a dream-', + }, }] def _real_extract(self, url): @@ -126,7 +126,7 @@ class TBSJPPlaylistIE(InfoExtractor): 'info_dict': { 'title': 'まもなく配信終了', 'id': '184f9970e7ba48e4915f1b252c55015e', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 778fa12..0d39be6 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -29,7 +29,7 @@ class TeachableBaseIE(InfoExtractor): 'courses.workitdaily.com': 'workitdaily', } - _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys())) + _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES)) def _real_initialize(self): self._logged_in = False @@ -43,8 +43,8 @@ class TeachableBaseIE(InfoExtractor): return login_page, urlh = self._download_webpage_handle( - 'https://%s/sign_in' % site, None, - 'Downloading %s login page' % site) + f'https://{site}/sign_in', None, + f'Downloading {site} login page') def is_logged(webpage): return any(re.search(p, webpage) for p in ( @@ -73,7 +73,7 @@ class TeachableBaseIE(InfoExtractor): post_url = urljoin(login_url, post_url) response = self._download_webpage( - post_url, None, 'Logging in to %s' % site, + post_url, None, f'Logging in to {site}', data=urlencode_postdata(login_form), headers={ 'Content-Type': 'application/x-www-form-urlencoded', @@ -82,8 +82,8 @@ class TeachableBaseIE(InfoExtractor): if '>I accept the new Privacy Policy<' in response: raise ExtractorError( - 'Unable to login: %s asks you to accept new Privacy Policy. ' - 'Go to https://%s/ and accept.' % (site, site), expected=True) + f'Unable to login: {site} asks you to accept new Privacy Policy. ' + f'Go to https://{site}/ and accept.', expected=True) # Successful login if is_logged(response): @@ -93,7 +93,7 @@ class TeachableBaseIE(InfoExtractor): message = get_element_by_class('alert', response) if message is not None: raise ExtractorError( - 'Unable to login: %s' % clean_html(message), expected=True) + f'Unable to login: {clean_html(message)}', expected=True) raise ExtractorError('Unable to log in') @@ -102,11 +102,11 @@ class TeachableIE(TeachableBaseIE): _WORKING = False _VALID_URL = r'''(?x) (?: - %shttps?://(?P<site_t>[^/]+)| - https?://(?:www\.)?(?P<site>%s) + {}https?://(?P<site_t>[^/]+)| + https?://(?:www\.)?(?P<site>{}) ) /courses/[^/]+/lectures/(?P<id>\d+) - ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE + '''.format(*TeachableBaseIE._VALID_URL_SUB_TUPLE) _TESTS = [{ 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364', @@ -146,7 +146,7 @@ class TeachableIE(TeachableBaseIE): if cls._is_teachable(webpage): if re.match(r'https?://[^/]+/(?:courses|p)', url): yield f'{cls._URL_PREFIX}{url}' - raise cls.StopExtraction() + raise cls.StopExtraction def _real_extract(self, url): mobj = self._match_valid_url(url) @@ -178,7 +178,7 @@ class TeachableIE(TeachableBaseIE): chapter = None chapter_number = None section_item = self._search_regex( - r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id, + rf'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']{video_id}[^>]+>.+?</li>)', webpage, 'section item', default=None, group='li') if section_item: chapter_number = int_or_none(self._search_regex( @@ -211,11 +211,11 @@ class TeachableIE(TeachableBaseIE): class TeachableCourseIE(TeachableBaseIE): _VALID_URL = r'''(?x) (?: - %shttps?://(?P<site_t>[^/]+)| - https?://(?:www\.)?(?P<site>%s) + {}https?://(?P<site_t>[^/]+)| + https?://(?:www\.)?(?P<site>{}) ) /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+) - ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE + '''.format(*TeachableBaseIE._VALID_URL_SUB_TUPLE) _TESTS = [{ 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/', 'info_dict': { @@ -242,8 +242,7 @@ class TeachableCourseIE(TeachableBaseIE): @classmethod def suitable(cls, url): - return False if TeachableIE.suitable(url) else super( - TeachableCourseIE, cls).suitable(url) + return False if TeachableIE.suitable(url) else super().suitable(url) def _real_extract(self, url): mobj = self._match_valid_url(url) @@ -259,7 +258,7 @@ class TeachableCourseIE(TeachableBaseIE): webpage = self._download_webpage(url, course_id) - url_base = 'https://%s/' % site + url_base = f'https://{site}/' entries = [] diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py index 7402409..4c26d31 100644 --- a/yt_dlp/extractor/teachertube.py +++ b/yt_dlp/extractor/teachertube.py @@ -50,7 +50,7 @@ class TeacherTubeIE(InfoExtractor): r'<div\b[^>]+\bclass=["\']msgBox error[^>]+>([^<]+)', webpage, 'error', default=None) if error: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {error}', expected=True) title = self._html_search_meta('title', webpage, 'title', fatal=True) TITLE_SUFFIX = ' - TeacherTube' @@ -70,7 +70,7 @@ class TeacherTubeIE(InfoExtractor): formats = [ { 'url': media_url, - 'quality': quality(determine_ext(media_url)) + 'quality': quality(determine_ext(media_url)), } for media_url in set(media_urls) ] @@ -102,7 +102,7 @@ class TeacherTubeUserIE(InfoExtractor): _TEST = { 'url': 'http://www.teachertube.com/user/profile/rbhagwati2', 'info_dict': { - 'id': 'rbhagwati2' + 'id': 'rbhagwati2', }, 'playlist_mincount': 179, } @@ -115,10 +115,10 @@ class TeacherTubeUserIE(InfoExtractor): webpage = self._download_webpage(url, user_id) urls.extend(re.findall(self._MEDIA_RE, webpage)) - pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1] + pages = re.findall(rf'/ajax-user/user-videos/{user_id}\?page=([0-9]+)', webpage)[:-1] for p in pages: - more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) - webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages))) + more = f'http://www.teachertube.com/ajax-user/user-videos/{user_id}?page={p}' + webpage = self._download_webpage(more, user_id, f'Downloading page {p}/{len(pages)}') video_urls = re.findall(self._MEDIA_RE, webpage) urls.extend(video_urls) diff --git a/yt_dlp/extractor/ted.py b/yt_dlp/extractor/ted.py index 0969bbb..8544c8b 100644 --- a/yt_dlp/extractor/ted.py +++ b/yt_dlp/extractor/ted.py @@ -46,11 +46,11 @@ class TedTalkIE(TedBaseIE): webpage = self._download_webpage(url, display_id) talk_info = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['videoData'] video_id = talk_info['id'] - playerData = self._parse_json(talk_info.get('playerData'), video_id) + player_data = self._parse_json(talk_info.get('playerData'), video_id) http_url = None formats, subtitles = [], {} - for format_id, resources in (playerData.get('resources') or {}).items(): + for format_id, resources in (player_data.get('resources') or {}).items(): if format_id == 'hls': stream_url = url_or_none(try_get(resources, lambda x: x['stream'])) if not stream_url: @@ -71,7 +71,7 @@ class TedTalkIE(TedBaseIE): bitrate = int_or_none(resource.get('bitrate')) formats.append({ 'url': h264_url, - 'format_id': '%s-%sk' % (format_id, bitrate), + 'format_id': f'{format_id}-{bitrate}k', 'tbr': bitrate, }) if re.search(r'\d+k', h264_url): @@ -81,7 +81,7 @@ class TedTalkIE(TedBaseIE): if not streamer: continue formats.extend({ - 'format_id': '%s-%s' % (format_id, resource.get('name')), + 'format_id': '{}-{}'.format(format_id, resource.get('name')), 'url': streamer, 'play_path': resource['file'], 'ext': 'flv', @@ -98,7 +98,7 @@ class TedTalkIE(TedBaseIE): continue bitrate_url = re.sub(r'\d+k', bitrate, http_url) if not self._is_valid_url( - bitrate_url, video_id, '%s bitrate' % bitrate): + bitrate_url, video_id, f'{bitrate} bitrate'): continue f = m3u8_format.copy() f.update({ @@ -119,12 +119,12 @@ class TedTalkIE(TedBaseIE): }) if not formats: - external = playerData.get('external') or {} + external = player_data.get('external') or {} service = external.get('service') or '' ext_url = external.get('code') if service.lower() == 'youtube' else None return self.url_result(ext_url or external['uri']) - thumbnail = playerData.get('thumb') or self._og_search_property('image', webpage) + thumbnail = player_data.get('thumb') or self._og_search_property('image', webpage) if thumbnail: # trim thumbnail resize parameters thumbnail = thumbnail.split('?')[0] @@ -141,7 +141,7 @@ class TedTalkIE(TedBaseIE): 'view_count': str_to_int(talk_info.get('viewedCount')), 'upload_date': unified_strdate(talk_info.get('publishedAt')), 'release_date': unified_strdate(talk_info.get('recordedOn')), - 'tags': try_get(playerData, lambda x: x['targeting']['tag'].split(',')), + 'tags': try_get(player_data, lambda x: x['targeting']['tag'].split(',')), } @@ -153,7 +153,7 @@ class TedSeriesIE(TedBaseIE): 'id': '3', 'title': 'Small Thing Big Idea', 'series': 'Small Thing Big Idea', - 'description': 'md5:6869ca52cec661aef72b3e9f7441c55c' + 'description': 'md5:6869ca52cec661aef72b3e9f7441c55c', }, 'playlist_mincount': 16, }, { @@ -163,7 +163,7 @@ class TedSeriesIE(TedBaseIE): 'title': 'The Way We Work Season 2', 'series': 'The Way We Work', 'description': 'md5:59469256e533e1a48c4aa926a382234c', - 'season_number': 2 + 'season_number': 2, }, 'playlist_mincount': 8, }] @@ -194,7 +194,7 @@ class TedPlaylistIE(TedBaseIE): 'info_dict': { 'id': '171', 'title': 'The most popular talks of all time', - 'description': 'md5:d2f22831dc86c7040e733a3cb3993d78' + 'description': 'md5:d2f22831dc86c7040e733a3cb3993d78', }, 'playlist_mincount': 25, }] diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py index 1705c2d..c5ca208 100644 --- a/yt_dlp/extractor/tele13.py +++ b/yt_dlp/extractor/tele13.py @@ -36,7 +36,7 @@ class Tele13IE(InfoExtractor): 'uploader_id': 'UCnLY_3ezwNcDSC_Wc6suZxw', }, 'add_ie': ['Youtube'], - } + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/telecaribe.py b/yt_dlp/extractor/telecaribe.py index 91118a1..5391876 100644 --- a/yt_dlp/extractor/telecaribe.py +++ b/yt_dlp/extractor/telecaribe.py @@ -37,7 +37,7 @@ class TelecaribePlayIE(InfoExtractor): }, 'params': { 'skip_download': 'Livestream', - } + }, }, { 'url': 'https://www.play.telecaribe.co/liveplus', 'info_dict': { diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index a3f0c7c..7a9dcd7 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -29,7 +29,7 @@ class TelecincoIE(InfoExtractor): 'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido', 'duration': 662, }, - }] + }], }, { 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a', diff --git a/yt_dlp/extractor/telegraaf.py b/yt_dlp/extractor/telegraaf.py index 13e9515..c0bcdc2 100644 --- a/yt_dlp/extractor/telegraaf.py +++ b/yt_dlp/extractor/telegraaf.py @@ -40,11 +40,11 @@ class TelegraafIE(InfoExtractor): videoId } } -}''' % article_id, +}''' % article_id, # noqa: UP031 })['data']['article']['videos'][0]['videoId'] item = self._download_json( - 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id, + f'https://content.tmgvideo.nl/playlist/item={video_id}/playlist.json', video_id)['items'][0] title = item['title'] @@ -62,7 +62,7 @@ class TelegraafIE(InfoExtractor): formats.extend(self._extract_mpd_formats( manifest_url, video_id, mpd_id='dash', fatal=False)) else: - self.report_warning('Unknown adaptive format %s' % ext) + self.report_warning(f'Unknown adaptive format {ext}') for location in locations.get('progressive', []): src = try_get(location, lambda x: x['sources'][0]['src']) if not src: @@ -72,7 +72,7 @@ class TelegraafIE(InfoExtractor): 'url': src, 'width': int_or_none(location.get('width')), 'height': int_or_none(location.get('height')), - 'format_id': 'http' + ('-%s' % label if label else ''), + 'format_id': 'http' + (f'-{label}' if label else ''), }) return { diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py index 5ec5485..4ec8bcc 100644 --- a/yt_dlp/extractor/telegram.py +++ b/yt_dlp/extractor/telegram.py @@ -62,7 +62,7 @@ class TelegramEmbedIE(InfoExtractor): }, 'params': { 'noplaylist': True, - } + }, }, { # 2-video post with 'single' query param 'url': 'https://t.me/vorposte/29342?single', diff --git a/yt_dlp/extractor/telemb.py b/yt_dlp/extractor/telemb.py index a71b14c..6644648 100644 --- a/yt_dlp/extractor/telemb.py +++ b/yt_dlp/extractor/telemb.py @@ -18,7 +18,7 @@ class TeleMBIE(InfoExtractor): 'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages', 'description': 'md5:bc5225f47b17c309761c856ad4776265', 'thumbnail': r're:^http://.*\.(?:jpg|png)$', - } + }, }, { # non-ASCII characters in download URL @@ -31,7 +31,7 @@ class TeleMBIE(InfoExtractor): 'title': 'Havré - Incendie mortel - Les reportages', 'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a', 'thumbnail': r're:^http://.*\.(?:jpg|png)$', - } + }, }, ] @@ -46,7 +46,7 @@ class TeleMBIE(InfoExtractor): for video_url in re.findall(r'file\s*:\s*"([^"]+)"', webpage): fmt = { 'url': video_url, - 'format_id': video_url.split(':')[0] + 'format_id': video_url.split(':')[0], } rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) if rtmp: diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py index 84b24de..7f4cecd 100644 --- a/yt_dlp/extractor/telemundo.py +++ b/yt_dlp/extractor/telemundo.py @@ -19,7 +19,7 @@ class TelemundoIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'https://www.telemundo.com/shows/al-rojo-vivo/empleo/video/personajes-de-times-square-piden-que-la-ciudad-de-nueva-york-los-deje-volver-trabajar-tmvo9816272', 'only_matching': True, @@ -46,5 +46,5 @@ class TelemundoIE(InfoExtractor): 'formats': formats, 'timestamp': date, 'uploader': 'Telemundo', - 'uploader_id': self._search_regex(r'https?:\/\/(?:[^/]+\/){3}video\/(?P<id>[^\/]+)', m3u8_url, 'Akamai account', fatal=False) + 'uploader_id': self._search_regex(r'https?:\/\/(?:[^/]+\/){3}video\/(?P<id>[^\/]+)', m3u8_url, 'Akamai account', fatal=False), } diff --git a/yt_dlp/extractor/telequebec.py b/yt_dlp/extractor/telequebec.py index 08a0837..7f5d5d2 100644 --- a/yt_dlp/extractor/telequebec.py +++ b/yt_dlp/extractor/telequebec.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, smuggle_url, @@ -72,7 +71,7 @@ class TeleQuebecIE(TeleQuebecBaseIE): product = media.get('product') or {} season = product.get('season') or {} info.update({ - 'description': try_get(media, lambda x: x['descriptions'][-1]['text'], compat_str), + 'description': try_get(media, lambda x: x['descriptions'][-1]['text'], str), 'series': try_get(season, lambda x: x['serie']['titre']), 'season': season.get('name'), 'season_number': int_or_none(season.get('seasonNo')), @@ -108,14 +107,14 @@ class TeleQuebecSquatIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'https://squat.api.telequebec.tv/v1/videos/%s' % video_id, + f'https://squat.api.telequebec.tv/v1/videos/{video_id}', video_id) media_id = video['sourceId'] return { '_type': 'url_transparent', - 'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id, + 'url': f'http://zonevideo.telequebec.tv/media/{media_id}', 'ie_key': TeleQuebecIE.ie_key(), 'id': media_id, 'title': video.get('titre'), diff --git a/yt_dlp/extractor/teletask.py b/yt_dlp/extractor/teletask.py index fd831f5..050196c 100644 --- a/yt_dlp/extractor/teletask.py +++ b/yt_dlp/extractor/teletask.py @@ -20,7 +20,7 @@ class TeleTaskIE(InfoExtractor): 'ext': 'mp4', 'title': 'Duplicate Detection', 'upload_date': '20141218', - } + }, }, { 'md5': 'e1e7218c5f0e4790015a437fcf6c71b4', 'info_dict': { @@ -28,8 +28,8 @@ class TeleTaskIE(InfoExtractor): 'ext': 'mp4', 'title': 'Duplicate Detection', 'upload_date': '20141218', - } - }] + }, + }], } def _real_extract(self, url): @@ -42,7 +42,7 @@ class TeleTaskIE(InfoExtractor): r'Date:</td><td>([^<]+)</td>', webpage, 'date', fatal=False)) entries = [{ - 'id': '%s-%s' % (lecture_id, format_id), + 'id': f'{lecture_id}-{format_id}', 'url': video_url, 'title': title, 'upload_date': upload_date, diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py index 380c84d..b651160 100644 --- a/yt_dlp/extractor/telewebion.py +++ b/yt_dlp/extractor/telewebion.py @@ -72,7 +72,7 @@ class TelewebionIE(InfoExtractor): result = self._download_json('https://graph.telewebion.com/graphql', video_id, note, data=json.dumps({ 'operationName': operation, 'query': f'query {operation}{parameters} @cacheControl(maxAge: 60) {{{query}\n}}\n', - 'variables': {name: value for name, (_, value) in (variables or {}).items()} + 'variables': {name: value for name, (_, value) in (variables or {}).items()}, }, separators=(',', ':')).encode(), headers={ 'Content-Type': 'application/json', 'Accept': 'application/json', diff --git a/yt_dlp/extractor/tempo.py b/yt_dlp/extractor/tempo.py index 71e54eb..4cd16f2 100644 --- a/yt_dlp/extractor/tempo.py +++ b/yt_dlp/extractor/tempo.py @@ -20,8 +20,8 @@ class IVXPlayerIE(InfoExtractor): 'upload_date': '20221204', 'title': 'Film Indonesia di Disney Content Showcase Asia Pacific 2022', 'timestamp': 1670151746, - 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2366065?width=300' - } + 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2366065?width=300', + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.cantika.com/video/31737/film-indonesia-di-disney-content-showcase-asia-pacific-2022', @@ -32,8 +32,8 @@ class IVXPlayerIE(InfoExtractor): 'title': 'Serial Indonesia di Disney Content Showcase Asia Pacific 2022', 'timestamp': 1670639416, 'upload_date': '20221210', - 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2374200?width=300' - } + 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/2374200?width=300', + }, }, { 'url': 'https://www.gooto.com/video/11437/wuling-suv-ramai-dikunjungi-di-giias-2018', 'info_dict': { @@ -44,8 +44,8 @@ class IVXPlayerIE(InfoExtractor): 'description': 'md5:6d901483d0aacc664aecb4489719aafa', 'duration': 75, 'timestamp': 1534011263, - 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/892109?width=300' - } + 'thumbnail': 'https://ivx-image.ivideosmart.com/serve/image/video/892109?width=300', + }, }] @classmethod @@ -56,7 +56,7 @@ class IVXPlayerIE(InfoExtractor): webpage) if mobj: yield f'ivxplayer:{mobj.group("video_id")}:{mobj.group("player_key")}' - raise cls.StopExtraction() + raise cls.StopExtraction def _real_extract(self, url): video_id, player_key = self._match_valid_url(url).group('video_id', 'player_key') @@ -74,7 +74,7 @@ class IVXPlayerIE(InfoExtractor): 'timestamp': parse_iso8601(traverse_obj(json_data, ('ivx', 'published_at'))), 'formats': formats, 'subtitles': subtitles, - 'thumbnail': traverse_obj(json_data, ('ivx', 'thumbnail_url')) + 'thumbnail': traverse_obj(json_data, ('ivx', 'thumbnail_url')), } @@ -93,7 +93,7 @@ class TempoIE(InfoExtractor): 'timestamp': 1658907970, 'upload_date': '20220727', 'tags': ['Anies Baswedan', ' PTUN', ' PTUN | Pengadilan Tata Usaha Negara', ' PTUN Batalkan UMP DKI', ' UMP DKI'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index ae2cb48..fc2b07a 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -25,7 +25,7 @@ class TencentBaseIE(InfoExtractor): if api_response.get('code') != '0.0' and msg is not None: if msg in ( '您所在区域暂无此内容版权(如设置VPN请关闭后重试)', - 'This content is not available in your area due to copyright restrictions. Please choose other videos.' + 'This content is not available in your area due to copyright restrictions. Please choose other videos.', ): self.raise_geo_restricted() raise ExtractorError(f'Tencent said: {msg}') diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py index c1b4a33..197d789 100644 --- a/yt_dlp/extractor/tennistv.py +++ b/yt_dlp/extractor/tennistv.py @@ -47,7 +47,7 @@ class TennisTVIE(InfoExtractor): _HEADERS = { 'origin': 'https://www.tennistv.com', 'referer': 'https://www.tennistv.com/', - 'content-Type': 'application/x-www-form-urlencoded' + 'content-Type': 'application/x-www-form-urlencoded', } def _perform_login(self, username, password): @@ -58,7 +58,7 @@ class TennisTVIE(InfoExtractor): 'redirect_uri': 'https://tennistv.com', 'response_mode': 'fragment', 'response_type': 'code', - 'scope': 'openid' + 'scope': 'openid', }) post_url = self._html_search_regex(r'action=["\']([^"\']+?)["\']\s+method=["\']post["\']', login_page, 'login POST url') @@ -67,7 +67,7 @@ class TennisTVIE(InfoExtractor): headers=self._HEADERS, data=urlencode_postdata({ 'username': username, 'password': password, - 'submitAction': 'Log In' + 'submitAction': 'Log In', })) if 'Your username or password was incorrect' in temp_page: raise ExtractorError('Your username or password was incorrect', expected=True) @@ -82,14 +82,14 @@ class TennisTVIE(InfoExtractor): 'response_type': 'code', 'scope': 'openid', 'nonce': random_uuidv4(), - 'prompt': 'none' + 'prompt': 'none', }) self.get_token(None, { 'code': urllib.parse.parse_qs(handle.url)['code'][-1], 'grant_type': 'authorization_code', 'client_id': 'tennis-tv-web', - 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html' + 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html', }) def get_token(self, video_id, payload): @@ -109,7 +109,7 @@ class TennisTVIE(InfoExtractor): self.raise_login_required() self.access_token, self.refresh_token = cookies['access_token'].value, cookies['refresh_token'].value - def _download_session_json(self, video_id, entryid,): + def _download_session_json(self, video_id, entryid): return self._download_json( f'https://atppayments.streamamg.com/api/v1/session/ksession/?lang=en&apijwttoken={self.access_token}&entryId={entryid}', video_id, 'Downloading ksession token', 'Failed to download ksession token', headers=self._HEADERS) @@ -126,7 +126,7 @@ class TennisTVIE(InfoExtractor): self.get_token(video_id, { 'grant_type': 'refresh_token', 'refresh_token': self.refresh_token, - 'client_id': 'tennis-tv-web' + 'client_id': 'tennis-tv-web', }) k_session = self._download_session_json(video_id, entryid).get('KSession') if k_session is None: diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index 11cc570..d8c556a 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -45,11 +45,11 @@ class TenPlayIE(InfoExtractor): 'timestamp': 1600770600, 'upload_date': '20200922', 'uploader': 'Channel 10', - 'uploader_id': '2199827728001' + 'uploader_id': '2199827728001', }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc', 'only_matching': True, @@ -63,7 +63,7 @@ class TenPlayIE(InfoExtractor): 'MA': 15, 'MA15+': 15, 'R': 18, - 'X': 18 + 'X': 18, } def _get_bearer_token(self, video_id): diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index 3cf0017..31e3c4d 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -30,7 +30,7 @@ class TestURLIE(InfoExtractor): ), None) if not extractor: raise ExtractorError( - 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors), + 'Found multiple matching extractors: {}'.format(' '.join(ie.IE_NAME for ie in matching_extractors)), expected=True) else: extractor = matching_extractors[0] diff --git a/yt_dlp/extractor/tf1.py b/yt_dlp/extractor/tf1.py index aba4927..5bade3a 100644 --- a/yt_dlp/extractor/tf1.py +++ b/yt_dlp/extractor/tf1.py @@ -43,7 +43,7 @@ class TF1IE(InfoExtractor): 'season': 'Season 3', 'tags': 'count:13', 'episode': 'Episode 21', - 'duration': 2312 + 'duration': 2312, }, 'params': {'skip_download': 'm3u8'}, }, { @@ -62,7 +62,7 @@ class TF1IE(InfoExtractor): 'variables': json.dumps({ 'programSlug': program_slug, 'slug': slug, - }) + }), })['data']['videoBySlug'] wat_id = video['streamId'] diff --git a/yt_dlp/extractor/tfo.py b/yt_dlp/extractor/tfo.py index d417f50..0d1b252 100644 --- a/yt_dlp/extractor/tfo.py +++ b/yt_dlp/extractor/tfo.py @@ -16,7 +16,7 @@ class TFOIE(InfoExtractor): 'ext': 'mp4', 'title': 'Video Game Hackathon', 'description': 'md5:558afeba217c6c8d96c60e5421795c07', - } + }, } def _real_extract(self, url): @@ -31,7 +31,7 @@ class TFOIE(InfoExtractor): if infos.get('success') == 0: if infos.get('code') == 'ErrGeoBlocked': self.raise_geo_restricted(countries=self._GEO_COUNTRIES) - raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, clean_html(infos['msg'])), expected=True) video_data = infos['data'] return { diff --git a/yt_dlp/extractor/theguardian.py b/yt_dlp/extractor/theguardian.py index fb64077..a9e4990 100644 --- a/yt_dlp/extractor/theguardian.py +++ b/yt_dlp/extractor/theguardian.py @@ -26,8 +26,8 @@ class TheGuardianPodcastIE(InfoExtractor): 'description': 'md5:cfd3df2791d394d2ab62cd571d5207ee', 'creator': 'Stephen Buranyi', 'thumbnail': 'md5:73c12558fcb3b0e2a59422bfb33b3f79', - 'release_date': '20231103' - } + 'release_date': '20231103', + }, }, { 'url': 'https://www.theguardian.com/news/audio/2023/oct/30/the-trials-of-robert-habeck-is-the-worlds-most-powerful-green-politician-doomed-to-fail-podcast', 'md5': 'd1771744681789b4cd7da2a08e487702', @@ -38,8 +38,8 @@ class TheGuardianPodcastIE(InfoExtractor): 'description': 'md5:1b5cf6582d1771c6b7077784b5456994', 'creator': 'Philip Oltermann', 'thumbnail': 'md5:6e5c5ec43843e956e20be793722e9080', - 'release_date': '20231030' - } + 'release_date': '20231030', + }, }, { 'url': 'https://www.theguardian.com/football/audio/2023/nov/06/arsenal-feel-hard-done-by-and-luton-hold-liverpool-football-weekly', 'md5': 'a2fcff6f8e060a95b1483295273dc35e', @@ -50,8 +50,8 @@ class TheGuardianPodcastIE(InfoExtractor): 'description': 'md5:286a9fbddaeb7c83cc65d1c4a5330b2a', 'creator': 'Max Rushden', 'thumbnail': 'md5:93eb7d6440f1bb94eb3a6cad63f48afd', - 'release_date': '20231106' - } + 'release_date': '20231106', + }, }, { 'url': 'https://www.theguardian.com/politics/audio/2023/nov/02/the-covid-inquiry-politics-weekly-uk-podcast', 'md5': '06a0f7e9701a80c8064a5d35690481ec', @@ -62,8 +62,8 @@ class TheGuardianPodcastIE(InfoExtractor): 'description': 'md5:207c98859c14903582b17d25b014046e', 'creator': 'Gaby Hinsliff', 'thumbnail': 'md5:28932a7b5a25b057be330d2ed70ea7f3', - 'release_date': '20231102' - } + 'release_date': '20231102', + }, }] def _real_extract(self, url): @@ -88,25 +88,25 @@ class TheGuardianPodcastPlaylistIE(InfoExtractor): 'info_dict': { 'id': 'theguardianswomensfootballweekly', 'title': "The Guardian's Women's Football Weekly", - 'description': 'md5:e2cc021311e582d29935a73614a43f51' + 'description': 'md5:e2cc021311e582d29935a73614a43f51', }, - 'playlist_mincount': 69 + 'playlist_mincount': 69, }, { 'url': 'https://www.theguardian.com/news/series/todayinfocus?page=2', 'info_dict': { 'id': 'todayinfocus', 'title': 'Today in Focus', - 'description': 'md5:0f097764fc0d359e0b6eb537be0387e2' + 'description': 'md5:0f097764fc0d359e0b6eb537be0387e2', }, - 'playlist_mincount': 1261 + 'playlist_mincount': 1261, }, { 'url': 'https://www.theguardian.com/news/series/the-audio-long-read', 'info_dict': { 'id': 'the-audio-long-read', 'title': 'The Audio Long Read', - 'description': 'md5:5462994a27527309562b25b6defc4ef3' + 'description': 'md5:5462994a27527309562b25b6defc4ef3', }, - 'playlist_mincount': 996 + 'playlist_mincount': 996, }] def _entries(self, url, playlist_id): @@ -117,8 +117,7 @@ class TheGuardianPodcastPlaylistIE(InfoExtractor): break episodes = get_elements_html_by_class('fc-item--type-media', webpage) - for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'data-id')): - yield url_path + yield from traverse_obj(episodes, (..., {extract_attributes}, 'data-id')) def _real_extract(self, url): podcast_id = self._match_id(url) diff --git a/yt_dlp/extractor/theholetv.py b/yt_dlp/extractor/theholetv.py index a13f83b..a3a7024 100644 --- a/yt_dlp/extractor/theholetv.py +++ b/yt_dlp/extractor/theholetv.py @@ -12,8 +12,8 @@ class TheHoleTvIE(InfoExtractor): 'ext': 'mp4', 'title': 'Сергей Орлов — Громкий вопрос', 'thumbnail': 'https://assets-cdn.the-hole.tv/images/t8gan4n6zn627e7wni11b2uemqts', - 'description': 'md5:45741a9202331f995d9fb76996759379' - } + 'description': 'md5:45741a9202331f995d9fb76996759379', + }, }] def _real_extract(self, url): @@ -31,5 +31,5 @@ class TheHoleTvIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': player_attrs.get('data-player-poster-value'), 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py index 99f0d42..dcdca8f 100644 --- a/yt_dlp/extractor/theintercept.py +++ b/yt_dlp/extractor/theintercept.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -20,7 +19,7 @@ class TheInterceptIE(InfoExtractor): 'timestamp': 1450429239, 'upload_date': '20151218', 'comment_count': int, - } + }, }] def _real_extract(self, url): @@ -35,8 +34,8 @@ class TheInterceptIE(InfoExtractor): if post['slug'] == display_id: return { '_type': 'url_transparent', - 'url': 'jwplatform:%s' % post['fov_videoid'], - 'id': compat_str(post['ID']), + 'url': 'jwplatform:{}'.format(post['fov_videoid']), + 'id': str(post['ID']), 'display_id': display_id, 'title': post['title'], 'description': post.get('excerpt'), diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index eeb33a6..7c1769c 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -1,4 +1,3 @@ -import binascii import hashlib import hmac import re @@ -42,8 +41,7 @@ class ThePlatformBaseIE(OnceIE): if exception.get('value') == 'GeoLocationBlocked': self.raise_geo_restricted(error_element.attrib['abstract']) elif error_element.attrib['src'].startswith( - 'http://link.theplatform.%s/s/errorFiles/Unavailable.' - % self._TP_TLD): + f'http://link.theplatform.{self._TP_TLD}/s/errorFiles/Unavailable.'): raise ExtractorError( error_element.attrib['abstract'], expected=True) @@ -70,7 +68,7 @@ class ThePlatformBaseIE(OnceIE): return formats, subtitles def _download_theplatform_metadata(self, path, video_id): - info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path) + info_url = f'http://link.theplatform.{self._TP_TLD}/s/{path}?format=preview' return self._download_json(info_url, video_id) def _parse_theplatform_metadata(self, info): @@ -140,7 +138,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): <meta\s+ property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+ content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2''', - r'(?s)<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//player\.theplatform\.com/p/.+?)\1' + r'(?s)<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//player\.theplatform\.com/p/.+?)\1', ] _TESTS = [{ @@ -225,17 +223,14 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): flags = '10' if include_qs else '00' expiration_date = '%x' % (int(time.time()) + life) - def str_to_hex(str): - return binascii.b2a_hex(str.encode('ascii')).decode('ascii') - - def hex_to_bytes(hex): - return binascii.a2b_hex(hex.encode('ascii')) + def str_to_hex(str_data): + return str_data.encode('ascii').hex() relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1) - clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path)) + clear_text = bytes.fromhex(flags + expiration_date + str_to_hex(relative_path)) checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() sig = flags + expiration_date + checksum + str_to_hex(sig_secret) - return '%s&sig=%s' % (url, sig) + return f'{url}&sig={sig}' def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -274,7 +269,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): break if feed_id is None: raise ExtractorError('Unable to find feed id') - return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % ( + return self.url_result('http://feed.theplatform.com/f/{}/{}?byGuid={}'.format( provider_id, feed_id, qs_dict['guid'][0])) if smuggled_data.get('force_smil_url', False): @@ -298,13 +293,10 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('onsite/', 'onsite/config/') config = self._download_json(config_url, video_id, 'Downloading config') - if 'releaseUrl' in config: - release_url = config['releaseUrl'] - else: - release_url = 'http://link.theplatform.com/s/%s?mbr=true' % path + release_url = config.get('releaseUrl') or f'http://link.theplatform.com/s/{path}?mbr=true' smil_url = release_url + '&formats=MPEG4&manifest=f4m' else: - smil_url = 'http://link.theplatform.com/s/%s?mbr=true' % path + smil_url = f'http://link.theplatform.com/s/{path}?mbr=true' sig = smuggled_data.get('sig') if sig: @@ -387,7 +379,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): if asset_type in asset_types_query: query.update(asset_types_query[asset_type]) cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query( - main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type) + main_smil_url or smil_url, query), video_id, f'Downloading SMIL data for {asset_type}') formats.extend(cur_formats) subtitles = self._merge_subtitles(subtitles, cur_subtitles) @@ -400,7 +392,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE): timestamp = int_or_none(entry.get('media$availableDate'), scale=1000) categories = [item['media$name'] for item in entry.get('media$categories', [])] - ret = self._extract_theplatform_metadata('%s/%s' % (provider_id, first_video_id), video_id) + ret = self._extract_theplatform_metadata(f'{provider_id}/{first_video_id}', video_id) subtitles = self._merge_subtitles(subtitles, ret['subtitles']) ret.update({ 'id': video_id, diff --git a/yt_dlp/extractor/thestar.py b/yt_dlp/extractor/thestar.py index 293c34c..38aa695 100644 --- a/yt_dlp/extractor/thestar.py +++ b/yt_dlp/extractor/thestar.py @@ -18,7 +18,7 @@ class TheStarIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, } BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/794267642001/default_default/index.html?videoId=%s' diff --git a/yt_dlp/extractor/theweatherchannel.py b/yt_dlp/extractor/theweatherchannel.py index d1921e4..424f778 100644 --- a/yt_dlp/extractor/theweatherchannel.py +++ b/yt_dlp/extractor/theweatherchannel.py @@ -24,7 +24,7 @@ class TheWeatherChannelIE(ThePlatformIE): # XXX: Do not subclass from concrete 'timestamp': 1689967343, 'display_id': 'invest-95l-in-atlantic-has-a-medium-chance-of-development', 'duration': 34.0, - } + }, }, { 'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india', 'only_matching': True, @@ -34,7 +34,7 @@ class TheWeatherChannelIE(ThePlatformIE): # XXX: Do not subclass from concrete asset_name, locale, display_id = self._match_valid_url(url).groups() if not locale: locale = 'en-US' - video_data = list(self._download_json( + video_data = next(iter(self._download_json( 'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{ 'name': 'getCMSAssetsUrlConfig', 'params': { @@ -44,10 +44,10 @@ class TheWeatherChannelIE(ThePlatformIE): # XXX: Do not subclass from concrete '$in': asset_name, }, }, - } + }, }]).encode(), headers={ 'Content-Type': 'application/json', - })['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0] + })['dal']['getCMSAssetsUrlConfig'].values()))['data'][0] video_id = video_data['id'] seo_meta = video_data.get('seometa', {}) title = video_data.get('title') or seo_meta['title'] diff --git a/yt_dlp/extractor/thisamericanlife.py b/yt_dlp/extractor/thisamericanlife.py index 9a3d798..8b5d1e6 100644 --- a/yt_dlp/extractor/thisamericanlife.py +++ b/yt_dlp/extractor/thisamericanlife.py @@ -22,11 +22,11 @@ class ThisAmericanLifeIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.thisamericanlife.org/radio-archives/episode/%s' % video_id, video_id) + f'http://www.thisamericanlife.org/radio-archives/episode/{video_id}', video_id) return { 'id': video_id, - 'url': 'http://stream.thisamericanlife.org/{0}/stream/{0}_64k.m3u8'.format(video_id), + 'url': f'http://stream.thisamericanlife.org/{video_id}/stream/{video_id}_64k.m3u8', 'protocol': 'm3u8_native', 'ext': 'm4a', 'acodec': 'aac', diff --git a/yt_dlp/extractor/thisvid.py b/yt_dlp/extractor/thisvid.py index 04b0838..2fd6ae8 100644 --- a/yt_dlp/extractor/thisvid.py +++ b/yt_dlp/extractor/thisvid.py @@ -27,7 +27,7 @@ class ThisVidIE(InfoExtractor): 'uploader': 'jeanslevisjeans', 'display_id': 'sitting-on-ball-tight-jeans', 'age_limit': 18, - } + }, }, { 'url': 'https://thisvid.com/embed/3533241/', 'md5': '839becb572995687e11a69dc4358a386', @@ -40,7 +40,7 @@ class ThisVidIE(InfoExtractor): 'uploader': 'jeanslevisjeans', 'display_id': 'sitting-on-ball-tight-jeans', 'age_limit': 18, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py index f7a13d2..45fdef9 100644 --- a/yt_dlp/extractor/threeqsdn.py +++ b/yt_dlp/extractor/threeqsdn.py @@ -14,7 +14,7 @@ class ThreeQSDNIE(InfoExtractor): IE_NAME = '3qsdn' IE_DESC = '3Q SDN' _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - _EMBED_REGEX = [r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % _VALID_URL] + _EMBED_REGEX = [rf'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>{_VALID_URL}.*?)\1'] _TESTS = [{ # https://player.3qsdn.com/demo.html 'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be', @@ -121,7 +121,7 @@ class ThreeQSDNIE(InfoExtractor): height = int_or_none(s.get('height')) formats.append({ 'ext': ext, - 'format_id': join_nonempty('http', ext, height and '%dp' % height), + 'format_id': join_nonempty('http', ext, height and f'{height}p'), 'height': height, 'source_preference': 0, 'url': src, @@ -152,5 +152,5 @@ class ThreeQSDNIE(InfoExtractor): # It seems like this would be correctly handled by default # However, unless someone can confirm this, the old # behaviour is being kept as-is - '_format_sort_fields': ('res', 'source_preference') + '_format_sort_fields': ('res', 'source_preference'), } diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py index dbd5090..32ea177 100644 --- a/yt_dlp/extractor/threespeak.py +++ b/yt_dlp/extractor/threespeak.py @@ -24,32 +24,32 @@ class ThreeSpeakIE(InfoExtractor): 'duration': 2703.867833, 'filesize': 1620054781, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) json_str = self._html_search_regex(r'JSON\.parse\(\'([^\']+)\'\)', webpage, 'json') # The json string itself is escaped. Hence the double parsing - data_json = self._parse_json(self._parse_json(f'"{json_str}"', id), id) - video_json = self._parse_json(data_json['json_metadata'], id) + data_json = self._parse_json(self._parse_json(f'"{json_str}"', video_id), video_id) + video_json = self._parse_json(data_json['json_metadata'], video_id) formats, subtitles = [], {} og_m3u8 = self._html_search_regex(r'<meta\s?property=\"ogvideo\"\s?content=\"([^\"]+)\">', webpage, 'og m3u8', fatal=False) if og_m3u8: - https_frmts, https_subs = self._extract_m3u8_formats_and_subtitles(og_m3u8, id, fatal=False, m3u8_id='https') + https_frmts, https_subs = self._extract_m3u8_formats_and_subtitles(og_m3u8, video_id, fatal=False, m3u8_id='https') formats.extend(https_frmts) subtitles = self._merge_subtitles(subtitles, https_subs) ipfs_m3u8 = try_get(video_json, lambda x: x['video']['info']['ipfs']) if ipfs_m3u8: - ipfs_frmts, ipfs_subs = self._extract_m3u8_formats_and_subtitles(f'https://ipfs.3speak.tv/ipfs/{ipfs_m3u8}', - id, fatal=False, m3u8_id='ipfs') + ipfs_frmts, ipfs_subs = self._extract_m3u8_formats_and_subtitles( + f'https://ipfs.3speak.tv/ipfs/{ipfs_m3u8}', video_id, fatal=False, m3u8_id='ipfs') formats.extend(ipfs_frmts) subtitles = self._merge_subtitles(subtitles, ipfs_subs) mp4_file = try_get(video_json, lambda x: x['video']['info']['file']) if mp4_file: formats.append({ - 'url': f'https://threespeakvideo.b-cdn.net/{id}/{mp4_file}', + 'url': f'https://threespeakvideo.b-cdn.net/{video_id}/{mp4_file}', 'ext': 'mp4', 'format_id': 'https-mp4', 'duration': try_get(video_json, lambda x: x['video']['info']['duration']), @@ -58,7 +58,7 @@ class ThreeSpeakIE(InfoExtractor): 'format_note': 'Original file', }) return { - 'id': id, + 'id': video_id, 'title': data_json.get('title') or data_json.get('root_title'), 'uploader': data_json.get('author'), 'description': try_get(video_json, lambda x: x['video']['content']['description']), @@ -82,12 +82,12 @@ class ThreeSpeakUserIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) entries = [ self.url_result( - 'https://3speak.tv/watch?v=%s' % video, + f'https://3speak.tv/watch?v={video}', ie=ThreeSpeakIE.ie_key()) for video in re.findall(r'data-payout\s?\=\s?\"([^\"]+)\"', webpage) if video ] - return self.playlist_result(entries, id) + return self.playlist_result(entries, playlist_id) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 7bcfded..c3505b1 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -5,10 +5,10 @@ import random import re import string import time +import urllib.parse import uuid from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse from ..networking import HEADRequest from ..utils import ( ExtractorError, @@ -30,6 +30,7 @@ from ..utils import ( try_call, try_get, url_or_none, + urlencode_postdata, ) @@ -43,8 +44,8 @@ class TikTokBaseIE(InfoExtractor): 'iid': None, # TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme 'app_name': 'musical_ly', - 'app_version': '34.1.2', - 'manifest_app_version': '2023401020', + 'app_version': '35.1.3', + 'manifest_app_version': '2023501030', # "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0 'aid': '0', } @@ -114,18 +115,19 @@ class TikTokBaseIE(InfoExtractor): 'universal data', display_id, end_pattern=r'</script>', default={}), ('__DEFAULT_SCOPE__', {dict})) or {} - def _call_api_impl(self, ep, query, video_id, fatal=True, + def _call_api_impl(self, ep, video_id, query=None, data=None, headers=None, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160))) webpage_cookies = self._get_cookies(self._WEBPAGE_HOST) if webpage_cookies.get('sid_tt'): self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value) return self._download_json( - 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id, + f'https://{self._API_HOSTNAME}/aweme/v1/{ep}/', video_id=video_id, fatal=fatal, note=note, errnote=errnote, headers={ 'User-Agent': self._APP_USER_AGENT, 'Accept': 'application/json', - }, query=query) + **(headers or {}), + }, query=query, data=data) def _build_api_query(self, query): return filter_dict({ @@ -138,7 +140,7 @@ class TikTokBaseIE(InfoExtractor): 'channel': 'googleplay', 'aid': self._APP_INFO['aid'], 'app_name': self._APP_INFO['app_name'], - 'version_code': ''.join((f'{int(v):02d}' for v in self._APP_INFO['app_version'].split('.'))), + 'version_code': ''.join(f'{int(v):02d}' for v in self._APP_INFO['app_version'].split('.')), 'version_name': self._APP_INFO['app_version'], 'manifest_version_code': self._APP_INFO['manifest_app_version'], 'update_version_code': self._APP_INFO['manifest_app_version'], @@ -174,7 +176,7 @@ class TikTokBaseIE(InfoExtractor): 'openudid': ''.join(random.choices('0123456789abcdef', k=16)), }) - def _call_api(self, ep, query, video_id, fatal=True, + def _call_api(self, ep, video_id, query=None, data=None, headers=None, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): if not self._APP_INFO and not self._get_next_app_info(): message = 'No working app info is available' @@ -187,9 +189,11 @@ class TikTokBaseIE(InfoExtractor): max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO for count in itertools.count(1): self.write_debug(str(self._APP_INFO)) - real_query = self._build_api_query(query) + real_query = self._build_api_query(query or {}) try: - return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote) + return self._call_api_impl( + ep, video_id, query=real_query, data=data, headers=headers, + fatal=fatal, note=note, errnote=errnote) except ExtractorError as e: if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: message = str(e.cause or e.msg) @@ -204,17 +208,29 @@ class TikTokBaseIE(InfoExtractor): raise def _extract_aweme_app(self, aweme_id): - feed_list = self._call_api( - 'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed', - errnote='Unable to download video feed').get('aweme_list') or [] - aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None) + aweme_detail = traverse_obj( + self._call_api('multi/aweme/detail', aweme_id, data=urlencode_postdata({ + 'aweme_ids': f'[{aweme_id}]', + 'request_source': '0', + }), headers={'X-Argus': ''}), ('aweme_details', 0, {dict})) if not aweme_detail: - raise ExtractorError('Unable to find video in feed', video_id=aweme_id) + raise ExtractorError('Unable to extract aweme detail info', video_id=aweme_id) return self._parse_aweme_video_app(aweme_detail) def _extract_web_data_and_status(self, url, video_id, fatal=True): - webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or '' - video_data, status = {}, None + video_data, status = {}, -1 + + res = self._download_webpage_handle(url, video_id, fatal=fatal, headers={'User-Agent': 'Mozilla/5.0'}) + if res is False: + return video_data, status + + webpage, urlh = res + if urllib.parse.urlparse(urlh.url).path == '/login': + message = 'TikTok is requiring login for access to this content' + if fatal: + self.raise_login_required(message) + self.report_warning(f'{message}. {self._login_hint()}') + return video_data, status if universal_data := self._get_universal_data(webpage, video_id): self.write_debug('Found universal data for rehydration') @@ -254,7 +270,7 @@ class TikTokBaseIE(InfoExtractor): 'ext': 'srt', 'data': '\n\n'.join( f'{i + 1}\n{srt_subtitles_timecode(line["start_time"] / 1000)} --> {srt_subtitles_timecode(line["end_time"] / 1000)}\n{line["text"]}' - for i, line in enumerate(caption_json['utterances']) if line.get('text')) + for i, line in enumerate(caption_json['utterances']) if line.get('text')), }) # feed endpoint subs if not subtitles: @@ -382,7 +398,7 @@ class TikTokBaseIE(InfoExtractor): auth_cookie = self._get_cookies(self._WEBPAGE_HOST).get('sid_tt') if auth_cookie: for f in formats: - self._set_cookie(compat_urllib_parse_urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value) + self._set_cookie(urllib.parse.urlparse(f['url']).hostname, 'sid_tt', auth_cookie.value) thumbnails = [] for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak', @@ -402,7 +418,7 @@ class TikTokBaseIE(InfoExtractor): contained_music_author = traverse_obj( music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str) - is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle') + is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - {}'.format(music_info.get('owner_handle')) if is_generic_og_trackname: music_track, music_author = contained_music_track or 'original sound', contained_music_author else: @@ -792,7 +808,7 @@ class TikTokIE(TikTokBaseIE): 'expected_warnings': ['Unable to find video in feed'], }, { # 1080p format - 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', # FIXME + 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', # FIXME: Web can only get audio 'md5': '982512017a8a917124d5a08c8ae79621', 'info_dict': { 'id': '7107337212743830830', @@ -846,7 +862,7 @@ class TikTokIE(TikTokBaseIE): }, { # Auto-captions available 'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -1026,7 +1042,8 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul for retry in self.RetryManager(): try: post_list = self._call_api( - self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}', + self._API_ENDPOINT, display_id, query=query, + note=f'Downloading video list page {page}', errnote='Unable to download video list') except ExtractorError as e: if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: @@ -1059,17 +1076,17 @@ class TikTokSoundIE(TikTokBaseListIE): 'url': 'https://www.tiktok.com/music/Build-a-Btch-6956990112127585029?lang=en', 'playlist_mincount': 100, 'info_dict': { - 'id': '6956990112127585029' + 'id': '6956990112127585029', }, - 'expected_warnings': ['Retrying'] + 'expected_warnings': ['Retrying'], }, { # Actual entries are less than listed video count 'url': 'https://www.tiktok.com/music/jiefei-soap-remix-7036843036118469381', 'playlist_mincount': 2182, 'info_dict': { - 'id': '7036843036118469381' + 'id': '7036843036118469381', }, - 'expected_warnings': ['Retrying'] + 'expected_warnings': ['Retrying'], }] @@ -1085,11 +1102,11 @@ class TikTokEffectIE(TikTokBaseListIE): 'info_dict': { 'id': '1258156', }, - 'expected_warnings': ['Retrying'] + 'expected_warnings': ['Retrying'], }, { # Different entries between mobile and web, depending on region 'url': 'https://www.tiktok.com/sticker/Elf-Friend-479565', - 'only_matching': True + 'only_matching': True, }] @@ -1106,16 +1123,16 @@ class TikTokTagIE(TikTokBaseListIE): 'id': '46294678', 'title': 'hello2018', }, - 'expected_warnings': ['Retrying'] + 'expected_warnings': ['Retrying'], }, { 'url': 'https://tiktok.com/tag/fypシ?is_copy_url=0&is_from_webapp=v1', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id, headers={ - 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' + 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)', }) tag_id = self._html_search_regex(r'snssdk\d*://challenge/detail/(\d+)', webpage, 'tag ID') return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id) @@ -1129,17 +1146,17 @@ class TikTokCollectionIE(TikTokBaseIE): 'url': 'https://www.tiktok.com/@imanoreotwe/collection/count-test-7371330159376370462', 'info_dict': { 'id': '7371330159376370462', - 'title': 'imanoreotwe-count-test' + 'title': 'imanoreotwe-count-test', }, - 'playlist_count': 9 + 'playlist_count': 9, }, { # tests returning multiple pages of a large collection 'url': 'https://www.tiktok.com/@imanoreotwe/collection/%F0%9F%98%82-7111887189571160875', 'info_dict': { 'id': '7111887189571160875', - 'title': 'imanoreotwe-%F0%9F%98%82' + 'title': 'imanoreotwe-%F0%9F%98%82', }, - 'playlist_mincount': 100 + 'playlist_mincount': 100, }] _API_BASE_URL = 'https://www.tiktok.com/api/collection/item_list/' _PAGE_COUNT = 30 diff --git a/yt_dlp/extractor/tmz.py b/yt_dlp/extractor/tmz.py index edd16bc..c9f23a8 100644 --- a/yt_dlp/extractor/tmz.py +++ b/yt_dlp/extractor/tmz.py @@ -174,8 +174,7 @@ class TMZIE(InfoExtractor): # see https://developers.google.com/youtube/iframe_api_reference#Video_Queueing_Functions match_obj = re.search(r'\.cueVideoById\(\s*(?P<quote>[\'"])(?P<id>.*?)(?P=quote)', webpage) if match_obj: - res = self.url_result(match_obj.group('id')) - return res + return self.url_result(match_obj.group('id')) # try to extract from twitter blockquote_el = get_element_by_attribute('class', 'twitter-tweet', webpage) if blockquote_el: @@ -185,8 +184,7 @@ class TMZIE(InfoExtractor): if matches: for _, match in matches: if '/status/' in match: - res = self.url_result(match) - return res + return self.url_result(match) raise ExtractorError('No video found!') if id not in jsonld: jsonld['id'] = url diff --git a/yt_dlp/extractor/tnaflix.py b/yt_dlp/extractor/tnaflix.py index 535e6c8..22832e0 100644 --- a/yt_dlp/extractor/tnaflix.py +++ b/yt_dlp/extractor/tnaflix.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( fix_xml_ampersands, float_or_none, @@ -64,7 +63,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): height = int_or_none(xpath_text(timeline, './imageHeight', 'thumbnail height')) return [{ - 'url': self._proto_relative_url(pattern_el.text.replace('#', compat_str(i)), 'http:'), + 'url': self._proto_relative_url(pattern_el.text.replace('#', str(i)), 'http:'), 'width': width, 'height': height, } for i in range(first, last + 1)] @@ -138,7 +137,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor): thumbnails = self._extract_thumbnails(cfg_xml) or [] thumbnails.append({ - 'url': self._proto_relative_url(xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:') + 'url': self._proto_relative_url(xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:'), }) # check for EMPFlix-style JSON and extract @@ -239,7 +238,7 @@ class TNAFlixIE(TNAEMPFlixBaseIE): 'duration': 91, 'age_limit': 18, 'categories': list, - } + }, }, { # non-anonymous uploader, categories 'url': 'https://www.tnaflix.com/teen-porn/Educational-xxx-video/video6538', @@ -255,7 +254,7 @@ class TNAFlixIE(TNAEMPFlixBaseIE): 'age_limit': 18, 'uploader': 'bobwhite39', 'categories': list, - } + }, }, { 'url': 'https://www.tnaflix.com/amateur-porn/bunzHD-Ms.Donk/video358632', 'only_matching': True, @@ -278,7 +277,7 @@ class EMPFlixIE(TNAEMPFlixBaseIE): 'duration': 83, 'age_limit': 18, 'categories': list, - } + }, }, { 'url': 'http://www.empflix.com/videos/[AROMA][ARMD-718]-Aoi-Yoshino-Sawa-25826.html', 'only_matching': True, @@ -313,7 +312,7 @@ class MovieFapIE(TNAFlixNetworkBaseIE): 'comment_count': int, 'average_rating': float, 'categories': ['Amateur', 'Masturbation', 'Mature', 'Flashing'], - } + }, }, { # quirky single-format case where the extension is given as fid, but the video is really an flv 'url': 'http://www.moviefap.com/videos/e5da0d3edce5404418f5/jeune-couple-russe.html', diff --git a/yt_dlp/extractor/toggle.py b/yt_dlp/extractor/toggle.py index 7073733..de2e03f 100644 --- a/yt_dlp/extractor/toggle.py +++ b/yt_dlp/extractor/toggle.py @@ -26,7 +26,7 @@ class ToggleIE(InfoExtractor): }, 'params': { 'skip_download': 'm3u8 download', - } + }, }, { 'note': 'DRM-protected video', 'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413', @@ -40,7 +40,7 @@ class ToggleIE(InfoExtractor): }, 'params': { 'skip_download': 'DRM-protected wvm download', - } + }, }, { # this also tests correct video id extraction 'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay', @@ -56,7 +56,7 @@ class ToggleIE(InfoExtractor): 'params': { 'skip_download': 'DRM-protected wvm download', }, - 'skip': 'm3u8 links are geo-restricted' + 'skip': 'm3u8 links are geo-restricted', }, { 'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331', 'only_matching': True, @@ -92,14 +92,14 @@ class ToggleIE(InfoExtractor): 'LocaleLanguage': '', 'LocaleCountry': '', 'LocaleDevice': '', - 'LocaleUserState': 0 + 'LocaleUserState': 0, }, 'Platform': 0, 'SiteGuid': 0, 'DomainID': '0', 'UDID': '', 'ApiUser': self._API_USER, - 'ApiPass': self._API_PASS + 'ApiPass': self._API_PASS, }, 'MediaID': video_id, 'mediaType': 0, @@ -107,7 +107,7 @@ class ToggleIE(InfoExtractor): info = self._download_json( 'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo', - video_id, 'Downloading video info json', data=json.dumps(params).encode('utf-8')) + video_id, 'Downloading video info json', data=json.dumps(params).encode()) title = info['MediaName'] @@ -122,8 +122,8 @@ class ToggleIE(InfoExtractor): if ext == 'm3u8': m3u8_formats = self._extract_m3u8_formats( video_url, video_id, ext='mp4', m3u8_id=vid_format, - note='Downloading %s m3u8 information' % vid_format, - errnote='Failed to download %s m3u8 information' % vid_format, + note=f'Downloading {vid_format} m3u8 information', + errnote=f'Failed to download {vid_format} m3u8 information', fatal=False) for f in m3u8_formats: # Apple FairPlay Streaming @@ -133,14 +133,14 @@ class ToggleIE(InfoExtractor): elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id=vid_format, - note='Downloading %s MPD manifest' % vid_format, - errnote='Failed to download %s MPD manifest' % vid_format, + note=f'Downloading {vid_format} MPD manifest', + errnote=f'Failed to download {vid_format} MPD manifest', fatal=False)) elif ext == 'ism': formats.extend(self._extract_ism_formats( video_url, video_id, ism_id=vid_format, - note='Downloading %s ISM manifest' % vid_format, - errnote='Failed to download %s ISM manifest' % vid_format, + note=f'Downloading {vid_format} ISM manifest', + errnote=f'Failed to download {vid_format} ISM manifest', fatal=False)) elif ext == 'mp4': formats.append({ diff --git a/yt_dlp/extractor/tonline.py b/yt_dlp/extractor/tonline.py index 33b9a32..cfbd36b 100644 --- a/yt_dlp/extractor/tonline.py +++ b/yt_dlp/extractor/tonline.py @@ -15,13 +15,13 @@ class TOnlineIE(InfoExtractor): 'ext': 'mp4', 'title': 'Drittes Remis! Zidane: "Es muss etwas passieren"', 'description': 'Es läuft nicht rund bei Real Madrid. Das 1:1 gegen den SD Eibar war das dritte Unentschieden in Folge in der Liga.', - } + }, } def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - 'http://www.t-online.de/tv/id_%s/tid_json_video' % video_id, video_id) + f'http://www.t-online.de/tv/id_{video_id}/tid_json_video', video_id) title = video_data['subtitle'] formats = [] diff --git a/yt_dlp/extractor/toongoggles.py b/yt_dlp/extractor/toongoggles.py index 1b8fc3a..bfeb16a 100644 --- a/yt_dlp/extractor/toongoggles.py +++ b/yt_dlp/extractor/toongoggles.py @@ -18,7 +18,7 @@ class ToonGogglesIE(InfoExtractor): 'description': 'Bernard decides to play football in order to be better than Lloyd and tries to beat him no matter how, he even cheats.', 'upload_date': '20160718', 'timestamp': 1468879330, - } + }, }, { 'url': 'http://www.toongoggles.com/shows/227759/om-nom-stories-around-the-world', 'info_dict': { diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py index ced1224..cbd2c9c 100644 --- a/yt_dlp/extractor/toutv.py +++ b/yt_dlp/extractor/toutv.py @@ -61,7 +61,7 @@ class TouTvIE(RadioCanadaIE): # XXX: Do not subclass from concrete IE def _real_extract(self, url): path = self._match_id(url) metadata = self._download_json( - 'https://services.radio-canada.ca/toutv/presentation/%s' % path, path, query={ + f'https://services.radio-canada.ca/toutv/presentation/{path}', path, query={ 'client_key': self._CLIENT_KEY, 'device': 'web', 'version': 4, diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py index ccb2ef8..5b625a3 100644 --- a/yt_dlp/extractor/toypics.py +++ b/yt_dlp/extractor/toypics.py @@ -16,7 +16,7 @@ class ToypicsIE(InfoExtractor): 'title': "Chance-Bulge'd, 2", 'age_limit': 18, 'uploader': 'kidsune', - } + }, } def _real_extract(self, url): @@ -70,10 +70,10 @@ class ToypicsUserIE(InfoExtractor): urls = [] page_count = (video_count + PAGE_SIZE + 1) // PAGE_SIZE for n in range(1, page_count + 1): - lpage_url = url + '/public/%d' % n + lpage_url = url + f'/public/{n}' lpage = self._download_webpage( lpage_url, username, - note='Downloading page %d/%d' % (n, page_count)) + note=f'Downloading page {n}/{page_count}') urls.extend( re.findall( r'<div[^>]+class=["\']preview[^>]+>\s*<a[^>]+href="(https?://videos\.toypics\.net/view/[^"]+)"', @@ -86,5 +86,5 @@ class ToypicsUserIE(InfoExtractor): '_type': 'url', 'url': eurl, 'ie_key': 'Toypics', - } for eurl in urls] + } for eurl in urls], } diff --git a/yt_dlp/extractor/traileraddict.py b/yt_dlp/extractor/traileraddict.py index 5c4a138..81c9365 100644 --- a/yt_dlp/extractor/traileraddict.py +++ b/yt_dlp/extractor/traileraddict.py @@ -14,7 +14,7 @@ class TrailerAddictIE(InfoExtractor): 'ext': 'mp4', 'title': 'Prince Avalanche Trailer', 'description': 'Trailer for Prince Avalanche.\n\nTwo highway road workers spend the summer of 1988 away from their city lives. The isolated landscape becomes a place of misadventure as the men find themselves at odds with each other and the women they left behind.', - } + }, } def _real_extract(self, url): @@ -39,7 +39,7 @@ class TrailerAddictIE(InfoExtractor): else: fvar = 'fvar' - info_url = 'http://www.traileraddict.com/%s.php?tid=%s' % (fvar, str(video_id)) + info_url = f'http://www.traileraddict.com/{fvar}.php?tid={video_id!s}' info_webpage = self._download_webpage(info_url, video_id, 'Downloading the info webpage') final_url = self._search_regex(r'&fileurl=(.+)', diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 545a672..7d800ae 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -63,7 +63,7 @@ class TrovoIE(TrovoBaseIE): 'title': '💥IGRAMO IGRICE UPADAJTE💥2500/5000 2022-06-28 22:01', 'live_status': 'is_live', }, - 'skip': 'May not be live' + 'skip': 'May not be live', }] def _real_extract(self, url): @@ -77,7 +77,7 @@ class TrovoIE(TrovoBaseIE): }, }) if live_info.get('isLive') == 0: - raise ExtractorError('%s is offline' % username, expected=True) + raise ExtractorError(f'{username} is offline', expected=True) program_info = live_info['programInfo'] program_id = program_info['id'] title = program_info['title'] diff --git a/yt_dlp/extractor/trtcocuk.py b/yt_dlp/extractor/trtcocuk.py index f27f5a1..0c899f1 100644 --- a/yt_dlp/extractor/trtcocuk.py +++ b/yt_dlp/extractor/trtcocuk.py @@ -15,7 +15,7 @@ class TrtCocukVideoIE(InfoExtractor): 'title': 'Kaptan Pengu ve Arkadaşları 1 Bölüm İzle TRT Çocuk', 'release_date': '20201209', 'release_timestamp': 1607513774, - } + }, }, { 'url': 'https://www.trtcocuk.net.tr/video/sef-rokanin-lezzet-dunyasi-17', 'info_dict': { @@ -23,7 +23,7 @@ class TrtCocukVideoIE(InfoExtractor): 'ext': 'mp4', 'series': '"Şef Roka\'nın Lezzet Dünyası"', 'title': 'Şef Roka\'nın Lezzet Dünyası 17 Bölüm İzle TRT Çocuk', - } + }, }] def _real_extract(self, url): @@ -44,5 +44,5 @@ class TrtCocukVideoIE(InfoExtractor): 'season_number': int_or_none(nuxtjs_data.get('season')), 'release_timestamp': parse_iso8601(nuxtjs_data.get('publishedDate')), 'series': traverse_obj(nuxtjs_data, ('show', 0, 'title')), - 'title': self._html_extract_title(webpage) # TODO: get better title + 'title': self._html_extract_title(webpage), # TODO: get better title } diff --git a/yt_dlp/extractor/trtworld.py b/yt_dlp/extractor/trtworld.py index dbb72a4..d33b9aa 100644 --- a/yt_dlp/extractor/trtworld.py +++ b/yt_dlp/extractor/trtworld.py @@ -16,7 +16,7 @@ class TrtWorldIE(InfoExtractor): 'release_date': '20231202', 'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/17647563_0-0-1920-1080.jpeg', 'description': 'md5:0a975c04257fb529c8f99c7b76a2cf12', - } + }, }, { 'url': 'https://www.trtworld.com/video/one-offs/frames-from-anatolia-recreating-a-james-bond-scene-in-istanbuls-grand-bazaar-14541780', 'info_dict': { @@ -27,7 +27,7 @@ class TrtWorldIE(InfoExtractor): 'release_date': '20230819', 'thumbnail': 'https://cdn-i.pr.trt.com.tr/trtworld/16939810_0-0-1920-1080.jpeg', 'description': 'md5:4050e21570cc3c40b6c9badae800a94f', - } + }, }, { 'url': 'https://www.trtworld.com/video/the-newsmakers/can-sudan-find-peace-amidst-failed-transition-to-democracy-12904760', 'info_dict': { @@ -36,8 +36,8 @@ class TrtWorldIE(InfoExtractor): 'title': 'Can Sudan find peace amidst failed transition to democracy?', 'release_timestamp': 1681972747, 'release_date': '20230420', - 'thumbnail': 'http://cdni0.trtworld.com/w768/q70/154214_NMYOUTUBETEMPLATE1_1681833018736.jpg' - } + 'thumbnail': 'http://cdni0.trtworld.com/w768/q70/154214_NMYOUTUBETEMPLATE1_1681833018736.jpg', + }, }, { 'url': 'https://www.trtworld.com/video/africa-matters/locals-learning-to-cope-with-rising-tides-of-kenyas-great-lakes-16059545', 'info_dict': { @@ -66,7 +66,7 @@ class TrtWorldIE(InfoExtractor): 'tags': [], 'live_status': 'not_live', 'like_count': int, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py index efedac1..183364b 100644 --- a/yt_dlp/extractor/trueid.py +++ b/yt_dlp/extractor/trueid.py @@ -31,7 +31,7 @@ class TrueIDIE(InfoExtractor): 'upload_date': '20200916', 'release_date': '20200630', }, - 'expected_warnings': ['Video is geo restricted.'] + 'expected_warnings': ['Video is geo restricted.'], }, { 'url': 'https://trueid.id/series/zZOBVPb62EwR/qXY73rwyl7oj/one-piece-ep-1/', 'md5': '1c6d976049bc3c89a8a25aed2c3fb081', @@ -51,7 +51,7 @@ class TrueIDIE(InfoExtractor): 'upload_date': '20210112', 'release_date': '20210131', }, - 'expected_warnings': ['Video is geo restricted.'] + 'expected_warnings': ['Video is geo restricted.'], }, { 'url': 'https://vn.trueid.net/series/7DNPM7Bpa9wv/pwLgEQ4Xbda2/haikyu-vua-bong-chuyen-phan-1/', 'info_dict': { @@ -69,7 +69,7 @@ class TrueIDIE(InfoExtractor): 'upload_date': '20210818', 'release_date': '20210818', }, - 'expected_warnings': ['Video is geo restricted.'] + 'expected_warnings': ['Video is geo restricted.'], }, { 'url': 'https://trueid.ph/series/l8rvvAw7Jwv8/l8rvvAw7Jwv8/naruto-trailer/', 'only_matching': True, diff --git a/yt_dlp/extractor/trutv.py b/yt_dlp/extractor/trutv.py index ea0f2f4..cbfe67a 100644 --- a/yt_dlp/extractor/trutv.py +++ b/yt_dlp/extractor/trutv.py @@ -32,7 +32,7 @@ class TruTVIE(TurnerBaseIE): display_id = clip_slug data = self._download_json( - 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id), + f'https://api.trutv.com/v2/web/{path}/{series_slug}/{display_id}', display_id) video_data = data['episode'] if video_id else data['info'] media_id = video_data['mediaId'] diff --git a/yt_dlp/extractor/tube8.py b/yt_dlp/extractor/tube8.py index 5f15b45..7267bf2 100644 --- a/yt_dlp/extractor/tube8.py +++ b/yt_dlp/extractor/tube8.py @@ -1,8 +1,8 @@ import re +import urllib.parse from .common import InfoExtractor from ..aes import aes_decrypt_text -from ..compat import compat_urllib_parse_unquote from ..utils import ( determine_ext, format_field, @@ -100,12 +100,12 @@ class Tube8IE(InfoExtractor): r'flashvars\.video_url\s*=\s*(["\'])(?P<url>http.+?)\1', webpage, 'video url', default=None, group='url') if video_url: - extract_format(compat_urllib_parse_unquote(video_url)) + extract_format(urllib.parse.unquote(video_url)) if not formats: if 'title="This video is no longer available"' in webpage: self.raise_no_formats( - 'Video %s is no longer available' % video_id, expected=True) + f'Video {video_id} is no longer available', expected=True) if not title: title = self._html_search_regex( @@ -153,8 +153,8 @@ class Tube8IE(InfoExtractor): tags_str = self._search_regex( r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)', webpage, 'tags', fatal=False) - tags = [t for t in re.findall( - r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None + tags = list(re.findall( + r'<a[^>]+href=[^>]+>([^<]+)', tags_str)) if tags_str else None info.update({ 'description': description, diff --git a/yt_dlp/extractor/tubetugraz.py b/yt_dlp/extractor/tubetugraz.py index a351e4e..d5dbf00 100644 --- a/yt_dlp/extractor/tubetugraz.py +++ b/yt_dlp/extractor/tubetugraz.py @@ -21,16 +21,20 @@ class TubeTuGrazBaseIE(InfoExtractor): if not urlh: return - content, urlh = self._download_webpage_handle( + response = self._download_webpage_handle( urlh.url, None, fatal=False, headers={'referer': urlh.url}, note='logging in', errnote='unable to log in', data=urlencode_postdata({ 'lang': 'de', '_eventId_proceed': '', 'j_username': username, - 'j_password': password + 'j_password': password, })) - if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html': + if not response: + return + + content, urlh = response + if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html': return if not self._html_search_regex( @@ -39,7 +43,7 @@ class TubeTuGrazBaseIE(InfoExtractor): self.report_warning('unable to login: incorrect password') return - content, urlh = self._download_webpage_handle( + urlh = self._request_webpage( urlh.url, None, fatal=False, headers={'referer': urlh.url}, note='logging in with TFA', errnote='unable to log in with TFA', data=urlencode_postdata({ @@ -53,33 +57,33 @@ class TubeTuGrazBaseIE(InfoExtractor): self.report_warning('unable to login: incorrect TFA code') def _extract_episode(self, episode_info): - id = episode_info.get('id') + video_id = episode_info.get('id') formats = list(self._extract_formats( - traverse_obj(episode_info, ('mediapackage', 'media', 'track')), id)) + traverse_obj(episode_info, ('mediapackage', 'media', 'track')), video_id)) title = traverse_obj(episode_info, ('mediapackage', 'title'), 'dcTitle') series_title = traverse_obj(episode_info, ('mediapackage', 'seriestitle')) creator = ', '.join(variadic(traverse_obj( episode_info, ('mediapackage', 'creators', 'creator'), 'dcCreator', default=''))) return { - 'id': id, + 'id': video_id, 'title': title, 'creator': creator or None, 'duration': traverse_obj(episode_info, ('mediapackage', 'duration'), 'dcExtent'), 'series': series_title, 'series_id': traverse_obj(episode_info, ('mediapackage', 'series'), 'dcIsPartOf'), 'episode': series_title and title, - 'formats': formats + 'formats': formats, } - def _set_format_type(self, formats, type): + def _set_format_type(self, formats, fmt_type): for f in formats: - f['format_note'] = type - if not type.startswith(self._FORMAT_TYPES[0]): + f['format_note'] = fmt_type + if not fmt_type.startswith(self._FORMAT_TYPES[0]): f['preference'] = -2 return formats - def _extract_formats(self, format_list, id): + def _extract_formats(self, format_list, video_id): has_hls, has_dash = False, False for format_info in format_list or []: @@ -87,7 +91,7 @@ class TubeTuGrazBaseIE(InfoExtractor): if url is None: continue - type = format_info.get('type') or 'unknown' + fmt_type = format_info.get('type') or 'unknown' transport = (format_info.get('transport') or 'https').lower() if transport == 'https': @@ -100,10 +104,10 @@ class TubeTuGrazBaseIE(InfoExtractor): }] elif transport == 'hls': has_hls, formats = True, self._extract_m3u8_formats( - url, id, 'mp4', fatal=False, note=f'downloading {type} HLS manifest') + url, video_id, 'mp4', fatal=False, note=f'downloading {fmt_type} HLS manifest') elif transport == 'dash': has_dash, formats = True, self._extract_mpd_formats( - url, id, fatal=False, note=f'downloading {type} DASH manifest') + url, video_id, fatal=False, note=f'downloading {fmt_type} DASH manifest') else: # RTMP, HDS, SMOOTH, and unknown formats # - RTMP url fails on every tested entry until now @@ -111,21 +115,21 @@ class TubeTuGrazBaseIE(InfoExtractor): # - SMOOTH url 404's on every tested entry until now continue - yield from self._set_format_type(formats, type) + yield from self._set_format_type(formats, fmt_type) # TODO: Add test for these - for type in self._FORMAT_TYPES: + for fmt_type in self._FORMAT_TYPES: if not has_hls: hls_formats = self._extract_m3u8_formats( - f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/playlist.m3u8', - id, 'mp4', fatal=False, note=f'Downloading {type} HLS manifest', errnote=False) or [] - yield from self._set_format_type(hls_formats, type) + f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/playlist.m3u8', + video_id, 'mp4', fatal=False, note=f'Downloading {fmt_type} HLS manifest', errnote=False) or [] + yield from self._set_format_type(hls_formats, fmt_type) if not has_dash: dash_formats = self._extract_mpd_formats( - f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{id}_{type}.smil/manifest_mpm4sav_mvlist.mpd', - id, fatal=False, note=f'Downloading {type} DASH manifest', errnote=False) - yield from self._set_format_type(dash_formats, type) + f'https://wowza.tugraz.at/matterhorn_engage/smil:engage-player_{video_id}_{fmt_type}.smil/manifest_mpm4sav_mvlist.mpd', + video_id, fatal=False, note=f'Downloading {fmt_type} DASH manifest', errnote=False) + yield from self._set_format_type(dash_formats, fmt_type) class TubeTuGrazIE(TubeTuGrazBaseIE): @@ -148,7 +152,7 @@ class TubeTuGrazIE(TubeTuGrazBaseIE): 'creator': 'Safran C', 'duration': 3295818, 'series_id': 'b1192fff-2aa7-4bf0-a5cf-7b15c3bd3b34', - } + }, }, { 'url': 'https://tube.tugraz.at/paella/ui/watch.html?id=2df6d787-e56a-428d-8ef4-d57f07eef238', 'md5': 'de0d854a56bf7318d2b693fe1adb89a5', @@ -158,7 +162,7 @@ class TubeTuGrazIE(TubeTuGrazBaseIE): 'ext': 'mp4', }, 'expected_warnings': ['Extractor failed to obtain "title"'], - } + }, ] def _real_extract(self, url): @@ -193,7 +197,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE): 'series': '[209351] Strassenwesen', 'creator': 'Neuhold R', 'duration': 6127024, - } + }, }, { 'info_dict': { @@ -205,7 +209,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE): 'series': '[209351] Strassenwesen', 'creator': 'Neuhold R', 'duration': 5374422, - } + }, }, { 'info_dict': { @@ -217,7 +221,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE): 'series': '[209351] Strassenwesen', 'creator': 'Neuhold R', 'duration': 5566404, - } + }, }, { 'info_dict': { @@ -229,24 +233,25 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE): 'series': '[209351] Strassenwesen', 'creator': 'Neuhold R', 'duration': 5420200, - } - } + }, + }, ], - 'min_playlist_count': 4 + 'min_playlist_count': 4, }] def _real_extract(self, url): - id = self._match_id(url) - episodes_data = self._download_json(self._API_EPISODE, id, query={'sid': id}, note='Downloading episode list') + playlist_id = self._match_id(url) + episodes_data = self._download_json( + self._API_EPISODE, playlist_id, query={'sid': playlist_id}, note='Downloading episode list') series_data = self._download_json( - 'https://tube.tugraz.at/series/series.json', id, fatal=False, + 'https://tube.tugraz.at/series/series.json', playlist_id, fatal=False, note='downloading series metadata', errnote='failed to download series metadata', query={ - 'seriesId': id, + 'seriesId': playlist_id, 'count': 1, - 'sort': 'TITLE' + 'sort': 'TITLE', }) return self.playlist_result( - map(self._extract_episode, episodes_data['search-results']['result']), id, + map(self._extract_episode, episodes_data['search-results']['result']), playlist_id, traverse_obj(series_data, ('catalogs', 0, 'http://purl.org/dc/terms/', 'title', 0, 'value'))) diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index bd46bc3..85eb3a2 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -7,33 +7,46 @@ from ..utils import ( int_or_none, js_to_json, traverse_obj, + url_or_none, urlencode_postdata, ) class TubiTvIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - tubitv:| - https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/ - ) - (?P<id>[0-9]+)''' + IE_NAME = 'tubitv' + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' - _GEO_COUNTRIES = ['US'] _TESTS = [{ - 'url': 'https://tubitv.com/movies/383676/tracker', - 'md5': '566fa0f76870302d11af0de89511d3f0', + 'url': 'https://tubitv.com/movies/100004539/the-39-steps', 'info_dict': { - 'id': '383676', + 'id': '100004539', 'ext': 'mp4', - 'title': 'Tracker', - 'description': 'md5:ff320baf43d0ad2655e538c1d5cd9706', - 'uploader_id': 'f866e2677ea2f0dff719788e4f7f9195', - 'release_year': 2010, + 'title': 'The 39 Steps', + 'description': 'md5:bb2f2dd337f0dc58c06cb509943f54c8', + 'uploader_id': 'abc2558d54505d4f0f32be94f2e7108c', + 'release_year': 1935, 'thumbnail': r're:^https?://.+\.(jpe?g|png)$', - 'duration': 6122, + 'duration': 5187, }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://tubitv.com/tv-shows/554628/s01-e01-rise-of-the-snakes', + 'info_dict': { + 'id': '554628', + 'ext': 'mp4', + 'title': 'S01:E01 - Rise of the Snakes', + 'description': 'md5:ba136f586de53af0372811e783a3f57d', + 'episode': 'Rise of the Snakes', + 'episode_number': 1, + 'season': 'Season 1', + 'season_number': 1, + 'uploader_id': '2a9273e728c510d22aa5c57d0646810b', + 'release_year': 2011, + 'thumbnail': r're:^https?://.+\.(jpe?g|png)$', + 'duration': 1376, + }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'md5': '43ac06be9326f41912dc64ccf7a80320', @@ -44,7 +57,7 @@ class TubiTvIE(InfoExtractor): 'description': 'A stand up comedian is forced to look at the decisions in his life while on a one week trip to the west coast.', 'uploader_id': 'bc168bee0d18dd1cb3b86c68706ab434', }, - 'skip': 'Content Unavailable' + 'skip': 'Content Unavailable', }, { 'url': 'http://tubitv.com/tv-shows/321886/s01_e01_on_nom_stories', 'only_matching': True, @@ -58,7 +71,7 @@ class TubiTvIE(InfoExtractor): 'uploader_id': 'd8fed30d4f24fcb22ec294421b9defc2', 'release_year': 1979, }, - 'skip': 'Content Unavailable' + 'skip': 'Content Unavailable', }] # DRM formats are included only to raise appropriate error @@ -81,45 +94,39 @@ class TubiTvIE(InfoExtractor): 'Login failed (invalid username/password)', expected=True) def _real_extract(self, url): - video_id = self._match_id(url) - video_data = self._download_json(f'https://tubitv.com/oz/videos/{video_id}/content', video_id, query={ - 'video_resources': ['dash', 'hlsv3', 'hlsv6', *self._UNPLAYABLE_FORMATS], - }) - title = video_data['title'] + video_id, video_type = self._match_valid_url(url).group('id', 'type') + webpage = self._download_webpage(f'https://tubitv.com/{video_type}/{video_id}/', video_id) + video_data = self._search_json( + r'window\.__data\s*=', webpage, 'data', video_id, + transform_source=js_to_json)['video']['byId'][video_id] formats = [] drm_formats = False - for resource in video_data['video_resources']: - if resource['type'] in ('dash', ): - formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False) - elif resource['type'] in ('hlsv3', 'hlsv6'): - formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False) - elif resource['type'] in self._UNPLAYABLE_FORMATS: + for resource in traverse_obj(video_data, ('video_resources', lambda _, v: url_or_none(v['manifest']['url']))): + resource_type = resource.get('type') + manifest_url = resource['manifest']['url'] + if resource_type == 'dash': + formats.extend(self._extract_mpd_formats(manifest_url, video_id, mpd_id=resource_type, fatal=False)) + elif resource_type in ('hlsv3', 'hlsv6'): + formats.extend(self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id=resource_type, fatal=False)) + elif resource_type in self._UNPLAYABLE_FORMATS: drm_formats = True + else: + self.report_warning(f'Skipping unknown resource type "{resource_type}"') if not formats and drm_formats: self.report_drm(video_id) elif not formats and not video_data.get('policy_match'): # policy_match is False if content was removed raise ExtractorError('This content is currently unavailable', expected=True) - thumbnails = [] - for thumbnail_url in video_data.get('thumbnails', []): - if not thumbnail_url: - continue - thumbnails.append({ - 'url': self._proto_relative_url(thumbnail_url), - }) - subtitles = {} - for sub in video_data.get('subtitles', []): - sub_url = sub.get('url') - if not sub_url: - continue + for sub in traverse_obj(video_data, ('subtitles', lambda _, v: url_or_none(v['url']))): subtitles.setdefault(sub.get('lang', 'English'), []).append({ - 'url': self._proto_relative_url(sub_url), + 'url': self._proto_relative_url(sub['url']), }) + title = traverse_obj(video_data, ('title', {str})) season_number, episode_number, episode_title = self._search_regex( r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None)) @@ -128,41 +135,68 @@ class TubiTvIE(InfoExtractor): 'title': title, 'formats': formats, 'subtitles': subtitles, - 'thumbnails': thumbnails, - 'description': video_data.get('description'), - 'duration': int_or_none(video_data.get('duration')), - 'uploader_id': video_data.get('publisher_id'), - 'release_year': int_or_none(video_data.get('year')), 'season_number': int_or_none(season_number), 'episode_number': int_or_none(episode_number), - 'episode_title': episode_title + 'episode': episode_title, + **traverse_obj(video_data, { + 'description': ('description', {str}), + 'duration': ('duration', {int_or_none}), + 'uploader_id': ('publisher_id', {str}), + 'release_year': ('year', {int_or_none}), + 'thumbnails': ('thumbnails', ..., {url_or_none}, {'url': {self._proto_relative_url}}), + }), } class TubiTvShowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)' + IE_NAME = 'tubitv:series' + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/\d+/(?P<show_name>[^/?#]+)(?:/season-(?P<season>\d+))?' _TESTS = [{ 'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true', - 'playlist_mincount': 390, + 'playlist_mincount': 389, 'info_dict': { 'id': 'the-joy-of-painting-with-bob-ross', - } + }, + }, { + 'url': 'https://tubitv.com/series/2311/the-saddle-club/season-1', + 'playlist_count': 26, + 'info_dict': { + 'id': 'the-saddle-club-season-1', + }, + }, { + 'url': 'https://tubitv.com/series/2311/the-saddle-club/season-3', + 'playlist_count': 19, + 'info_dict': { + 'id': 'the-saddle-club-season-3', + }, + }, { + 'url': 'https://tubitv.com/series/2311/the-saddle-club/', + 'playlist_mincount': 71, + 'info_dict': { + 'id': 'the-saddle-club', + }, }] - def _entries(self, show_url, show_name): - show_webpage = self._download_webpage(show_url, show_name) + def _entries(self, show_url, playlist_id, selected_season): + webpage = self._download_webpage(show_url, playlist_id) + + data = self._search_json( + r'window\.__data\s*=', webpage, 'data', playlist_id, + transform_source=js_to_json)['video'] - show_json = self._parse_json(self._search_regex( - r'window\.__data\s*=\s*({[^<]+});\s*</script>', - show_webpage, 'data'), show_name, transform_source=js_to_json)['video'] + # v['number'] is already a decimal string, but stringify to protect against API changes + path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}] - for episode_id in show_json['fullContentById'].keys(): - if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's': - continue - yield self.url_result( - 'tubitv:%s' % episode_id, - ie=TubiTvIE.ie_key(), video_id=episode_id) + for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)): + season_number = int_or_none(season.get('number')) + for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])): + episode_id = episode['id'] + yield self.url_result( + f'https://tubitv.com/tv-shows/{episode_id}/', TubiTvIE, episode_id, + season_number=season_number, episode_number=int_or_none(episode.get('num'))) def _real_extract(self, url): - show_name = self._match_valid_url(url).group('show_name') - return self.playlist_result(self._entries(url, show_name), playlist_id=show_name) + playlist_id, selected_season = self._match_valid_url(url).group('show_name', 'season') + if selected_season: + playlist_id = f'{playlist_id}-season-{selected_season}' + return self.playlist_result(self._entries(url, playlist_id, selected_season), playlist_id) diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index f2d0c59..7f851bf 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -28,7 +28,7 @@ class TumblrIE(InfoExtractor): 'repost_count': int, 'age_limit': 0, 'tags': ['Orphan Black', 'Tatiana Maslany', 'Interview', 'Video', 'OB S1 DVD Extras'], - } + }, }, { 'note': 'multiple formats', 'url': 'https://maskofthedragon.tumblr.com/post/626907179849564160/mona-talking-in-english', @@ -65,7 +65,7 @@ class TumblrIE(InfoExtractor): 'repost_count': int, 'age_limit': 0, 'tags': [], - } + }, }, { 'note': 'dashboard only (original post)', 'url': 'https://jujanon.tumblr.com/post/159704441298/my-baby-eating', @@ -82,7 +82,7 @@ class TumblrIE(InfoExtractor): 'repost_count': int, 'age_limit': 0, 'tags': ['crabs', 'my video', 'my pets'], - } + }, }, { 'note': 'dashboard only (reblog)', 'url': 'https://bartlebyshop.tumblr.com/post/180294460076/duality-of-bird', @@ -99,7 +99,7 @@ class TumblrIE(InfoExtractor): 'repost_count': int, 'age_limit': 0, 'tags': [], - } + }, }, { 'note': 'dashboard only (external)', 'url': 'https://afloweroutofstone.tumblr.com/post/675661759168823296/the-blues-remembers-everything-the-country-forgot', @@ -149,7 +149,7 @@ class TumblrIE(InfoExtractor): 'uploader': 'naked-yogi', }, # 'add_ie': ['Vidme'], - 'skip': 'dead embedded video host' + 'skip': 'dead embedded video host', }, { 'url': 'https://prozdvoices.tumblr.com/post/673201091169681408/what-recording-voice-acting-sounds-like', 'md5': 'a0063fc8110e6c9afe44065b4ea68177', @@ -363,8 +363,8 @@ class TumblrIE(InfoExtractor): # if it's a reblog, og:description will be the reblogger's comment, not the uploader's. # content_json is always the op, so if it exists but has no text, there's no description if content_json: - description = '\n\n'.join(( - item.get('text') for item in content_json if item.get('type') == 'text')) or None + description = '\n\n'.join( + item.get('text') for item in content_json if item.get('type') == 'text') or None else: description = self._og_search_description(webpage, default=None) uploader_id = traverse_obj(post_json, 'reblogged_root_name', 'blog_name') diff --git a/yt_dlp/extractor/tunein.py b/yt_dlp/extractor/tunein.py index fd2fe13..05bb2a9 100644 --- a/yt_dlp/extractor/tunein.py +++ b/yt_dlp/extractor/tunein.py @@ -117,7 +117,7 @@ class TuneInPodcastIE(TuneInBaseIE): 'playlist_mincount': 200, }, { 'url': 'https://tunein.com/embed/player/p191660/', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://tunein.com/podcasts/World-News/BBC-News-p14/', 'info_dict': { @@ -230,5 +230,5 @@ class TuneInShortenerIE(InfoExtractor): if url_parsed.port == 443: url = url_parsed._replace(netloc=url_parsed.hostname).url - self.to_screen('Following redirect: %s' % url) + self.to_screen(f'Following redirect: {url}') return self.url_result(url) diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index b27db87..8b79a8b 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -1,7 +1,6 @@ import re from .adobepass import AdobePassIE -from ..compat import compat_str from ..utils import ( ExtractorError, determine_ext, @@ -79,7 +78,7 @@ class TurnerBaseIE(AdobePassIE): ext = determine_ext(video_url) if video_url.startswith('/mp4:protected/'): continue - # TODO Correct extraction for these files + # TODO: Correct extraction for these files # protected_path_data = path_data.get('protected') # if not protected_path_data or not rtmp_src: # continue @@ -160,7 +159,7 @@ class TurnerBaseIE(AdobePassIE): 'height': int(mobj.group('height')), 'tbr': int_or_none(mobj.group('bitrate')), }) - elif isinstance(format_id, compat_str): + elif isinstance(format_id, str): if format_id.isdigit(): f['tbr'] = int(format_id) else: @@ -187,7 +186,7 @@ class TurnerBaseIE(AdobePassIE): 'scc': 'scc', 'webvtt': 'vtt', 'smptett': 'tt', - }.get(source.get('format')) + }.get(source.get('format')), }) thumbnails.extend({ @@ -219,7 +218,7 @@ class TurnerBaseIE(AdobePassIE): def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None): is_live = ap_data.get('is_live') streams_data = self._download_json( - 'http://medium.ngtv.io/media/%s/tv' % media_id, + f'http://medium.ngtv.io/media/{media_id}/tv', media_id)['media']['tv'] duration = None chapters = [] diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index 9b19e79..601ecab 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -52,10 +52,10 @@ class TV2IE(InfoExtractor): format_urls = [] for protocol in self._PROTOCOLS: try: - data = self._download_json('https://api.sumo.tv2.no/play/%s?stream=%s' % (video_id, protocol), + data = self._download_json(f'https://api.sumo.tv2.no/play/{video_id}?stream={protocol}', video_id, 'Downloading playabck JSON', headers={'content-type': 'application/json'}, - data='{"device":{"id":"1-1-1","name":"Nettleser (HTML)"}}'.encode())['playback'] + data=b'{"device":{"id":"1-1-1","name":"Nettleser (HTML)"}}')['playback'] except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 401: error = self._parse_json(e.cause.response.read().decode(), video_id)['error'] @@ -71,7 +71,7 @@ class TV2IE(InfoExtractor): video_url = item.get('url') if not video_url or video_url in format_urls: continue - format_id = '%s-%s' % (protocol.lower(), item.get('type')) + format_id = '{}-{}'.format(protocol.lower(), item.get('type')) if not self._is_valid_url(video_url, video_id, format_id): continue format_urls.append(video_url) @@ -97,9 +97,9 @@ class TV2IE(InfoExtractor): self.report_drm(video_id) thumbnails = [{ - 'id': type, + 'id': thumb_type, 'url': thumb_url, - } for type, thumb_url in (asset.get('images') or {}).items()] + } for thumb_type, thumb_url in (asset.get('images') or {}).items()] return { 'id': video_id, @@ -151,7 +151,7 @@ class TV2ArticleIE(InfoExtractor): assets.append(asset) entries = [ - self.url_result('http://www.tv2.no/v/%s' % asset_id, 'TV2') + self.url_result(f'http://www.tv2.no/v/{asset_id}', 'TV2') for asset_id in assets] title = remove_end(self._og_search_title(webpage), ' - TV2.no') @@ -196,7 +196,7 @@ class KatsomoIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id) + api_base = f'http://{self._API_DOMAIN}/api/web/asset/{video_id}' asset = self._download_json( api_base + '.json', video_id, @@ -209,7 +209,7 @@ class KatsomoIE(InfoExtractor): for protocol in self._PROTOCOLS: try: data = self._download_json( - api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol, + api_base + f'/play.json?protocol={protocol}&videoFormat=SMIL+ISMUSP', video_id, 'Downloading play JSON')['playback'] except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 401: @@ -232,7 +232,7 @@ class KatsomoIE(InfoExtractor): video_url = item.get('url') if not video_url or video_url in format_urls: continue - format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat')) + format_id = '{}-{}'.format(protocol.lower(), item.get('mediaFormat')) if not self._is_valid_url(video_url, video_id, format_id): continue format_urls.append(video_url) diff --git a/yt_dlp/extractor/tv24ua.py b/yt_dlp/extractor/tv24ua.py index 89905ac..2787de4 100644 --- a/yt_dlp/extractor/tv24ua.py +++ b/yt_dlp/extractor/tv24ua.py @@ -15,7 +15,7 @@ class TV24UAVideoIE(InfoExtractor): 'ext': 'mp4', 'title': 'У Харкові ворожа ракета прилетіла в будинок, де слухали пісні про "офіцерів-росіян"', 'thumbnail': r're:^https?://.*\.jpe?g', - } + }, }, { 'url': 'https://24tv.ua/news/showPlayer.do?videoUrl=2022/07/2074790&objectId=2074790&w=640&h=360', 'only_matching': True, @@ -32,7 +32,7 @@ class TV24UAVideoIE(InfoExtractor): 'ext': 'mp4', 'title': 'Росіяни руйнують Бородянку на Київщині та стріляють з літаків по мешканцях: шокуючі фото', 'thumbnail': r're:^https?://.*\.jpe?g', - } + }, }, { 'url': 'https://24tv.ua/vipalyuyut-nashi-mista-sela-dsns-pokazali-motoroshni-naslidki_n1883966', @@ -43,7 +43,7 @@ class TV24UAVideoIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpe?g', }, 'params': {'allowed_extractors': ['Generic', '24tv.ua']}, - } + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/tv2dk.py b/yt_dlp/extractor/tv2dk.py index 35e92f1..9cd7606 100644 --- a/yt_dlp/extractor/tv2dk.py +++ b/yt_dlp/extractor/tv2dk.py @@ -82,7 +82,7 @@ class TV2DKIE(InfoExtractor): def add_entry(partner_id, kaltura_id): entries.append(self.url_result( - 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura', + f'kaltura:{partner_id}:{kaltura_id}', 'Kaltura', video_id=kaltura_id)) for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage): diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py index cd35ff5..3fd41a3 100644 --- a/yt_dlp/extractor/tv2hu.py +++ b/yt_dlp/extractor/tv2hu.py @@ -1,4 +1,3 @@ -# encoding: utf-8 from .common import InfoExtractor from ..utils import ( UnsupportedError, @@ -44,14 +43,14 @@ class TV2HuIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) - json_data = self._download_json(f'https://tv2play.hu/api/search/{id}', id) + video_id = self._match_id(url) + json_data = self._download_json(f'https://tv2play.hu/api/search/{video_id}', video_id) if json_data['contentType'] == 'showpage': ribbon_ids = traverse_obj(json_data, ('pages', ..., 'tabs', ..., 'ribbonIds'), get_all=False, expected_type=list) entries = [self.url_result(f'https://tv2play.hu/szalag/{ribbon_id}', ie=TV2HuSeriesIE.ie_key(), video_id=ribbon_id) for ribbon_id in ribbon_ids] - return self.playlist_result(entries, playlist_id=id) + return self.playlist_result(entries, playlist_id=video_id) elif json_data['contentType'] != 'video': raise UnsupportedError(url) @@ -88,17 +87,17 @@ class TV2HuSeriesIE(InfoExtractor): 'playlist_mincount': 284, 'info_dict': { 'id': '59', - } + }, }] def _real_extract(self, url): - id = self._match_id(url) - json_data = self._download_json(f'https://tv2play.hu/api/ribbons/{id}/0?size=100000', id) + playlist_id = self._match_id(url) + json_data = self._download_json(f'https://tv2play.hu/api/ribbons/{playlist_id}/0?size=100000', playlist_id) entries = [] for card in json_data.get('cards', []): video_id = card.get('slug') if video_id: - entries.append(self.url_result(f'https://tv2play.hu/{video_id}', - ie=TV2HuIE.ie_key(), video_id=video_id)) + entries.append(self.url_result( + f'https://tv2play.hu/{video_id}', TV2HuIE, video_id)) - return self.playlist_result(entries, playlist_id=id) + return self.playlist_result(entries, playlist_id=playlist_id) diff --git a/yt_dlp/extractor/tv4.py b/yt_dlp/extractor/tv4.py index 10a2fe6..73a446e 100644 --- a/yt_dlp/extractor/tv4.py +++ b/yt_dlp/extractor/tv4.py @@ -76,7 +76,7 @@ class TV4IE(InfoExtractor): { 'url': 'https://www.tv4play.se/program/nyheterna/avsnitt/13315940', 'only_matching': True, - } + }, ] def _call_api(self, endpoint, video_id, headers=None, query={}): diff --git a/yt_dlp/extractor/tv5unis.py b/yt_dlp/extractor/tv5unis.py index 978255b..88fd334 100644 --- a/yt_dlp/extractor/tv5unis.py +++ b/yt_dlp/extractor/tv5unis.py @@ -32,7 +32,7 @@ class TV5UnisBaseIE(InfoExtractor): } } } -}''' % (self._GQL_QUERY_NAME, self._gql_args(groups)), +}''' % (self._GQL_QUERY_NAME, self._gql_args(groups)), # noqa: UP031 })['data'][self._GQL_QUERY_NAME] media_id = product['videoElement']['mediaId'] @@ -61,13 +61,13 @@ class TV5UnisVideoIE(TV5UnisBaseIE): 'ext': 'mp4', 'title': 'Watatatow', 'duration': 10.01, - } + }, } _GQL_QUERY_NAME = 'productById' @staticmethod def _gql_args(groups): - return 'id: %s' % groups + return f'id: {groups}' class TV5UnisIE(TV5UnisBaseIE): @@ -80,7 +80,7 @@ class TV5UnisIE(TV5UnisBaseIE): 'id': 'e5ee23a586c44612a56aad61accf16ef', 'ext': 'mp4', 'title': 'Je ne peux pas lui résister', - 'description': "Atys, le nouveau concierge de l'école, a réussi à ébranler la confiance de Mado en affirmant qu\'une médaille, ce n'est que du métal. Comme Mado essaie de lui prouver que ses valeurs sont solides, il veut la mettre à l'épreuve...", + 'description': "Atys, le nouveau concierge de l'école, a réussi à ébranler la confiance de Mado en affirmant qu'une médaille, ce n'est que du métal. Comme Mado essaie de lui prouver que ses valeurs sont solides, il veut la mettre à l'épreuve...", 'subtitles': { 'fr': 'count:1', }, @@ -110,7 +110,7 @@ class TV5UnisIE(TV5UnisBaseIE): @staticmethod def _gql_args(groups): - args = 'rootProductSlug: "%s"' % groups[0] + args = f'rootProductSlug: "{groups[0]}"' if groups[1]: - args += ', seasonNumber: %s, episodeNumber: %s' % groups[1:] + args += ', seasonNumber: {}, episodeNumber: {}'.format(*groups[1:]) return args diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py index dbebda4..855c771 100644 --- a/yt_dlp/extractor/tvanouvelles.py +++ b/yt_dlp/extractor/tvanouvelles.py @@ -42,7 +42,7 @@ class TVANouvellesArticleIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if TVANouvellesIE.suitable(url) else super(TVANouvellesArticleIE, cls).suitable(url) + return False if TVANouvellesIE.suitable(url) else super().suitable(url) def _real_extract(self, url): display_id = self._match_id(url) @@ -51,7 +51,7 @@ class TVANouvellesArticleIE(InfoExtractor): entries = [ self.url_result( - 'http://www.tvanouvelles.ca/videos/%s' % mobj.group('id'), + 'http://www.tvanouvelles.ca/videos/{}'.format(mobj.group('id')), ie=TVANouvellesIE.ie_key(), video_id=mobj.group('id')) for mobj in re.finditer( r'data-video-id=(["\'])?(?P<id>\d+)', webpage)] diff --git a/yt_dlp/extractor/tvc.py b/yt_dlp/extractor/tvc.py index caa76ab..7c8c922 100644 --- a/yt_dlp/extractor/tvc.py +++ b/yt_dlp/extractor/tvc.py @@ -24,7 +24,7 @@ class TVCIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://www.tvc.ru/video/json/id/%s' % video_id, video_id) + f'http://www.tvc.ru/video/json/id/{video_id}', video_id) formats = [] for info in video.get('path', {}).get('quality', []): diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index 5f78968..8105db4 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -60,7 +60,7 @@ class TVerIE(InfoExtractor): 'platform_uid': self._PLATFORM_UID, 'platform_token': self._PLATFORM_TOKEN, }, headers={ - 'x-tver-platform-type': 'web' + 'x-tver-platform-type': 'web', }) episode_content = traverse_obj( episode_info, ('result', 'episode', 'content')) or {} diff --git a/yt_dlp/extractor/tvigle.py b/yt_dlp/extractor/tvigle.py index 6c98219..6b87eca 100644 --- a/yt_dlp/extractor/tvigle.py +++ b/yt_dlp/extractor/tvigle.py @@ -46,7 +46,7 @@ class TvigleIE(InfoExtractor): }, { 'url': 'https://cloud.tvigle.ru/video/5267604/', 'only_matching': True, - } + }, ] def _real_extract(self, url): @@ -63,7 +63,7 @@ class TvigleIE(InfoExtractor): webpage, 'video id') video_data = self._download_json( - 'http://cloud.tvigle.ru/api/play/video/%s/' % video_id, display_id) + f'http://cloud.tvigle.ru/api/play/video/{video_id}/', display_id) item = video_data['playlist']['items'][0] @@ -76,7 +76,7 @@ class TvigleIE(InfoExtractor): msg=error_message, countries=self._GEO_COUNTRIES) else: raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_message), + f'{self.IE_NAME} returned error: {error_message}', expected=True) title = item['title'] @@ -115,7 +115,7 @@ class TvigleIE(InfoExtractor): item, lambda x: x['video_files_size'][vcodec][format_id])) formats.append({ 'url': video_url, - 'format_id': '%s-%s' % (vcodec, format_id), + 'format_id': f'{vcodec}-{format_id}', 'vcodec': vcodec, 'height': int_or_none(height), 'filesize': filesize, diff --git a/yt_dlp/extractor/tviplayer.py b/yt_dlp/extractor/tviplayer.py index 7e9b04d..17f8535 100644 --- a/yt_dlp/extractor/tviplayer.py +++ b/yt_dlp/extractor/tviplayer.py @@ -14,7 +14,7 @@ class TVIPlayerIE(InfoExtractor): 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/61c8ee630cf2cc58e7d98d9f/', 'season_number': 8, 'season': 'Season 8', - } + }, }, { 'url': 'https://tviplayer.iol.pt/programa/isabel/62b471090cf26256cd2a8594/video/62be445f0cf2ea4f0a5218e5', 'info_dict': { @@ -25,7 +25,7 @@ class TVIPlayerIE(InfoExtractor): 'title': 'Isabel - Episódio 1', 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62beac200cf2f9a86eab856b/', 'season_number': 1, - } + }, }, { # no /programa/ 'url': 'https://tviplayer.iol.pt/video/62c4131c0cf2f9a86eac06bb', @@ -37,7 +37,7 @@ class TVIPlayerIE(InfoExtractor): 'season': 'Season 2', 'duration': 148, 'season_number': 2, - } + }, }, { # episodio url 'url': 'https://tviplayer.iol.pt/programa/para-sempre/61716c360cf2365a5ed894c4/episodio/t1e187', @@ -49,7 +49,7 @@ class TVIPlayerIE(InfoExtractor): 'thumbnail': 'https://www.iol.pt/multimedia/oratvi/multimedia/imagem/id/62eda30b0cf2ea367d48973b/', 'duration': 1250, 'season_number': 1, - } + }, }] def _real_initialize(self): diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py index ac48058..0dc43a9 100644 --- a/yt_dlp/extractor/tvn24.py +++ b/yt_dlp/extractor/tvn24.py @@ -18,7 +18,7 @@ class TVN24IE(InfoExtractor): 'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"', 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.', 'thumbnail': 're:https?://.*[.]jpeg', - } + }, }, { # different layout 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html', @@ -58,7 +58,7 @@ class TVN24IE(InfoExtractor): def extract_json(attr, name, default=NO_DEFAULT, fatal=True): return self._parse_json( self._search_regex( - r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage, + rf'\b{attr}=(["\'])(?P<json>(?!\1).+?)\1', webpage, name, group='json', default=default, fatal=fatal) or '{}', display_id, transform_source=unescapeHTML, fatal=fatal) diff --git a/yt_dlp/extractor/tvnoe.py b/yt_dlp/extractor/tvnoe.py index 917c46b..24a8262 100644 --- a/yt_dlp/extractor/tvnoe.py +++ b/yt_dlp/extractor/tvnoe.py @@ -18,7 +18,7 @@ class TVNoeIE(InfoExtractor): 'series': 'Noční univerzita', 'title': 'prof. Tomáš Halík, Th.D. - Návrat náboženství a střet civilizací', 'description': 'md5:f337bae384e1a531a52c55ebc50fff41', - } + }, } def _real_extract(self, url): @@ -40,7 +40,7 @@ class TVNoeIE(InfoExtractor): 'field-name-field-podnazev', webpage)), 'description': clean_html(get_element_by_class( 'field-name-body', webpage)), - 'series': clean_html(get_element_by_class('title', webpage)) + 'series': clean_html(get_element_by_class('title', webpage)), }) return info_dict diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index f1ebf02..da30829 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -98,7 +98,7 @@ class TVPIE(InfoExtractor): 'playlist_mincount': 1800, 'params': { 'skip_download': True, - } + }, }, { # ABC-specific video embeding # moved to https://bajkowakraina.tvp.pl/wideo/50981130,teleranek,51027049,zubr,51116450 @@ -221,7 +221,7 @@ class TVPIE(InfoExtractor): if website_data.get('items_total_count') > website_data.get('items_per_page'): for page in itertools.count(2): page_website_data = self._parse_vue_website_data( - self._download_webpage(url, page_id, note='Downloading page #%d' % page, + self._download_webpage(url, page_id, note=f'Downloading page #{page}', query={'page': page}), page_id) if not page_website_data.get('videos') and not page_website_data.get('items'): @@ -290,7 +290,7 @@ class TVPStreamIE(InfoExtractor): def _real_extract(self, url): channel_id = self._match_id(url) - channel_url = self._proto_relative_url('//stream.tvp.pl/?channel_id=%s' % channel_id or 'default') + channel_url = self._proto_relative_url(f'//stream.tvp.pl/?channel_id={channel_id}' or 'default') webpage = self._download_webpage(channel_url, channel_id or 'default', 'Downloading channel webpage') channels = self._search_json( r'window\.__channels\s*=', webpage, 'channel list', channel_id, @@ -300,7 +300,7 @@ class TVPStreamIE(InfoExtractor): return { '_type': 'url_transparent', 'id': channel_id or channel['id'], - 'url': 'tvp:%s' % audition['video_id'], + 'url': 'tvp:{}'.format(audition['video_id']), 'title': audition.get('title'), 'alt_title': channel.get('title'), 'is_live': True, @@ -379,8 +379,7 @@ class TVPEmbedIE(InfoExtractor): )) webpage = self._download_webpage( - ('https://www.tvp.pl/sess/TVPlayer2/api.php?id=%s' - + '&@method=getTvpConfig&@callback=%s') % (video_id, callback), video_id) + f'https://www.tvp.pl/sess/TVPlayer2/api.php?id={video_id}&@method=getTvpConfig&@callback={callback}', video_id) # stripping JSONP padding datastr = webpage[15 + len(callback):-3] @@ -470,7 +469,7 @@ class TVPEmbedIE(InfoExtractor): # vod.tvp.pl if info.get('vortalName') == 'vod': info_dict.update({ - 'title': '%s, %s' % (info.get('title'), info.get('subtitle')), + 'title': '{}, {}'.format(info.get('title'), info.get('subtitle')), 'series': info.get('title'), 'season': info.get('season'), 'episode_number': info.get('episode'), diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index 29185d3..b4a927a 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -107,7 +107,7 @@ class TVPlayIE(InfoExtractor): { 'url': 'mtg:418113', 'only_matching': True, - } + }, ] def _real_extract(self, url): @@ -118,13 +118,13 @@ class TVPlayIE(InfoExtractor): if geo_country: self._initialize_geo_bypass({'countries': [geo_country.upper()]}) video = self._download_json( - 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON') + f'http://playapi.mtgx.tv/v3/videos/{video_id}', video_id, 'Downloading video JSON') title = video['title'] try: streams = self._download_json( - 'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id, + f'http://playapi.mtgx.tv/v3/videos/stream/{video_id}', video_id, 'Downloading streams JSON') except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 403: @@ -143,7 +143,7 @@ class TVPlayIE(InfoExtractor): formats.extend(self._extract_f4m_formats( update_url_query(video_url, { 'hdcore': '3.5.0', - 'plugin': 'aasp-3.5.0.151.81' + 'plugin': 'aasp-3.5.0.151.81', }), video_id, f4m_id='hds', fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -184,7 +184,7 @@ class TVPlayIE(InfoExtractor): if sami_path: lang = self._search_regex( r'_([a-z]{2})\.xml', sami_path, 'lang', - default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]) + default=urllib.parse.urlparse(url).netloc.rsplit('.', 1)[-1]) subtitles[lang] = [{ 'url': sami_path, }] @@ -250,7 +250,7 @@ class TVPlayHomeIE(InfoExtractor): 'description': 'md5:c6926e9710f1a126f028fbe121eddb79', 'duration': 2440, }, - 'skip': '404' + 'skip': '404', }, { 'url': 'https://play.tv3.lt/lives/tv6-lt,live-2838694/optibet-a-lygos-rungtynes-marijampoles-suduva--vilniaus-riteriai,programme-3422014', 'only_matching': True, diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py index d43bdc2..7c47bc7 100644 --- a/yt_dlp/extractor/tvplayer.py +++ b/yt_dlp/extractor/tvplayer.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -21,7 +20,7 @@ class TVPlayerIE(InfoExtractor): 'params': { # m3u8 download 'skip_download': True, - } + }, } def _real_extract(self, url): @@ -48,7 +47,7 @@ class TVPlayerIE(InfoExtractor): validate = context['validate'] platform = try_get( - context, lambda x: x['platform']['key'], compat_str) or 'firefox' + context, lambda x: x['platform']['key'], str) or 'firefox' try: response = self._download_json( @@ -66,7 +65,7 @@ class TVPlayerIE(InfoExtractor): response = self._parse_json( e.cause.response.read().decode(), resource_id)['tvplayer']['response'] raise ExtractorError( - '%s said: %s' % (self.IE_NAME, response['error']), expected=True) + '{} said: {}'.format(self.IE_NAME, response['error']), expected=True) raise formats = self._extract_m3u8_formats(response['stream'], display_id, 'mp4') diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py index 9249550..77a4136 100644 --- a/yt_dlp/extractor/tweakers.py +++ b/yt_dlp/extractor/tweakers.py @@ -19,13 +19,13 @@ class TweakersIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpe?g$', 'duration': 386, 'uploader_id': 's7JeEm', - } + }, } def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( - 'https://tweakers.net/video/s1playlist/%s/1920/1080/playlist.json' % video_id, + f'https://tweakers.net/video/s1playlist/{video_id}/1920/1080/playlist.json', video_id)['items'][0] title = video_data['title'] diff --git a/yt_dlp/extractor/twentymin.py b/yt_dlp/extractor/twentymin.py index 74f90b0..a08c591 100644 --- a/yt_dlp/extractor/twentymin.py +++ b/yt_dlp/extractor/twentymin.py @@ -47,14 +47,14 @@ class TwentyMinutenIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://api.20min.ch/video/%s/show' % video_id, + f'http://api.20min.ch/video/{video_id}/show', video_id)['content'] title = video['title'] formats = [{ 'format_id': format_id, - 'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p), + 'url': f'http://podcast.20min-tv.ch/podcast/20min/{video_id}{p}.mp4', 'quality': quality, } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])] @@ -64,7 +64,7 @@ class TwentyMinutenIE(InfoExtractor): def extract_count(kind): return try_get( video, - lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind])) + lambda x: int_or_none(x['communityobject'][f'thumbs_{kind}'])) like_count = extract_count('up') dislike_count = extract_count('down') diff --git a/yt_dlp/extractor/twentythreevideo.py b/yt_dlp/extractor/twentythreevideo.py index 290c376..af7c006 100644 --- a/yt_dlp/extractor/twentythreevideo.py +++ b/yt_dlp/extractor/twentythreevideo.py @@ -16,7 +16,7 @@ class TwentyThreeVideoIE(InfoExtractor): 'upload_date': '20171221', 'uploader_id': '12258964', 'uploader': 'Rasmus Bysted', - } + }, }, { 'url': 'https://bonnier-publications-danmark.23video.com/v.ihtml/player.html?token=f0dc46476e06e13afd5a1f84a29e31e8&source=embed&photo%5fid=36137620', 'only_matching': True, @@ -24,7 +24,7 @@ class TwentyThreeVideoIE(InfoExtractor): def _real_extract(self, url): domain, query, photo_id = self._match_valid_url(url).groups() - base_url = 'https://%s' % domain + base_url = f'https://{domain}' photo_data = self._download_json( base_url + '/api/photo/list?' + query, photo_id, query={ 'format': 'json', diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 28ea16c..53b4084 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -139,7 +139,7 @@ class TwitCastingIE(InfoExtractor): webpage, 'datetime', None)) stream_server_data = self._download_json( - 'https://twitcasting.tv/streamserver.php?target=%s&mode=client' % uploader_id, video_id, + f'https://twitcasting.tv/streamserver.php?target={uploader_id}&mode=client', video_id, 'Downloading live info', fatal=False) is_live = any(f'data-{x}' in webpage for x in ['is-onlive="true"', 'live-type="live"', 'status="online"']) @@ -189,7 +189,7 @@ class TwitCastingIE(InfoExtractor): for mode, ws_url in streams.items(): formats.append({ 'url': ws_url, - 'format_id': 'ws-%s' % mode, + 'format_id': f'ws-{mode}', 'ext': 'mp4', 'quality': qq(mode), 'source_preference': -10, @@ -244,8 +244,8 @@ class TwitCastingLiveIE(InfoExtractor): def _real_extract(self, url): uploader_id = self._match_id(url) self.to_screen( - 'Downloading live video of user {0}. ' - 'Pass "https://twitcasting.tv/{0}/show" to download the history'.format(uploader_id)) + f'Downloading live video of user {uploader_id}. ' + f'Pass "https://twitcasting.tv/{uploader_id}/show" to download the history') is_live = traverse_obj(self._download_json( f'https://frontendapi.twitcasting.tv/watch/user/{uploader_id}', @@ -284,10 +284,10 @@ class TwitCastingUserIE(InfoExtractor): }] def _entries(self, uploader_id): - base_url = next_url = 'https://twitcasting.tv/%s/show' % uploader_id + base_url = next_url = f'https://twitcasting.tv/{uploader_id}/show' for page_num in itertools.count(1): webpage = self._download_webpage( - next_url, uploader_id, query={'filter': 'watchable'}, note='Downloading page %d' % page_num) + next_url, uploader_id, query={'filter': 'watchable'}, note=f'Downloading page {page_num}') matches = re.finditer( r'(?s)<a\s+class="tw-movie-thumbnail2"\s+href="(?P<url>/[^/"]+/movie/\d+)"', webpage) for mobj in matches: @@ -303,4 +303,4 @@ class TwitCastingUserIE(InfoExtractor): def _real_extract(self, url): uploader_id = self._match_id(url) return self.playlist_result( - self._entries(uploader_id), uploader_id, '%s - Live History' % uploader_id) + self._entries(uploader_id), uploader_id, f'{uploader_id} - Live History') diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 80cba09..44b19ad 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -3,13 +3,9 @@ import itertools import json import random import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_str, - compat_urllib_parse_urlparse, -) from ..utils import ( ExtractorError, UserNotLive, @@ -64,7 +60,7 @@ class TwitchBaseIE(InfoExtractor): def _perform_login(self, username, password): def fail(message): raise ExtractorError( - 'Unable to login. Twitch said: %s' % message, expected=True) + f'Unable to login. Twitch said: {message}', expected=True) def login_step(page, urlh, note, data): form = self._hidden_inputs(page) @@ -156,12 +152,12 @@ class TwitchBaseIE(InfoExtractor): 'persistedQuery': { 'version': 1, 'sha256Hash': self._OPERATION_HASHES[op['operationName']], - } + }, } return self._download_base_gql(video_id, ops, note) def _download_access_token(self, video_id, token_kind, param_name): - method = '%sPlaybackAccessToken' % token_kind + method = f'{token_kind}PlaybackAccessToken' ops = { 'query': '''{ %s( @@ -176,11 +172,11 @@ class TwitchBaseIE(InfoExtractor): value signature } - }''' % (method, param_name, video_id), + }''' % (method, param_name, video_id), # noqa: UP031 } return self._download_base_gql( video_id, ops, - 'Downloading %s access token GraphQL' % token_kind)['data'][method] + f'Downloading {token_kind} access token GraphQL')['data'][method] def _get_thumbnails(self, thumbnail): return [{ @@ -242,8 +238,8 @@ class TwitchVodIE(TwitchBaseIE): { 'start_time': 0, 'end_time': 17208, - 'title': 'League of Legends' - } + 'title': 'League of Legends', + }, ], 'live_status': 'was_live', }, @@ -301,25 +297,25 @@ class TwitchVodIE(TwitchBaseIE): { 'start_time': 0, 'end_time': 573, - 'title': 'League of Legends' + 'title': 'League of Legends', }, { 'start_time': 573, 'end_time': 3922, - 'title': 'Legends of Runeterra' + 'title': 'Legends of Runeterra', }, { 'start_time': 3922, 'end_time': 11643, - 'title': 'Art' - } + 'title': 'Art', + }, ], 'live_status': 'was_live', 'thumbnail': r're:^https?://.*\.jpg$', 'view_count': int, }, 'params': { - 'skip_download': True + 'skip_download': True, }, }, { 'note': 'Storyboards', @@ -338,18 +334,18 @@ class TwitchVodIE(TwitchBaseIE): { 'start_time': 0, 'end_time': 573, - 'title': 'League of Legends' + 'title': 'League of Legends', }, { 'start_time': 573, 'end_time': 3922, - 'title': 'Legends of Runeterra' + 'title': 'Legends of Runeterra', }, { 'start_time': 3922, 'end_time': 11643, - 'title': 'Art' - } + 'title': 'Art', + }, ], 'live_status': 'was_live', 'thumbnail': r're:^https?://.*\.jpg$', @@ -359,8 +355,8 @@ class TwitchVodIE(TwitchBaseIE): }, 'params': { 'format': 'mhtml', - 'skip_download': True - } + 'skip_download': True, + }, }, { 'note': 'VOD with single chapter', 'url': 'https://www.twitch.tv/videos/1536751224', @@ -377,17 +373,17 @@ class TwitchVodIE(TwitchBaseIE): { 'start_time': 0, 'end_time': 8353, - 'title': 'League of Legends' - } + 'title': 'League of Legends', + }, ], 'live_status': 'was_live', 'thumbnail': r're:^https?://.*\.jpg$', 'view_count': int, }, 'params': { - 'skip_download': True + 'skip_download': True, }, - 'expected_warnings': ['Unable to download JSON metadata: HTTP Error 403: Forbidden'] + 'expected_warnings': ['Unable to download JSON metadata: HTTP Error 403: Forbidden'], }, { 'url': 'https://www.twitch.tv/tangotek/schedule?vodID=1822395420', 'only_matching': True, @@ -488,7 +484,7 @@ class TwitchVodIE(TwitchBaseIE): vod_id = info.get('id') or item_id # id backward compatibility for download archives if vod_id[0] != 'v': - vod_id = 'v%s' % vod_id + vod_id = f'v{vod_id}' thumbnail = url_or_none(info.get('previewThumbnailURL')) is_live = None if thumbnail: @@ -503,8 +499,8 @@ class TwitchVodIE(TwitchBaseIE): 'description': info.get('description'), 'duration': int_or_none(info.get('lengthSeconds')), 'thumbnails': self._get_thumbnails(thumbnail), - 'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str), - 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str), + 'uploader': try_get(info, lambda x: x['owner']['displayName'], str), + 'uploader_id': try_get(info, lambda x: x['owner']['login'], str), 'timestamp': unified_timestamp(info.get('publishedAt')), 'view_count': int_or_none(info.get('viewCount')), 'chapters': list(self._extract_chapters(info, item_id)), @@ -559,8 +555,8 @@ class TwitchVodIE(TwitchBaseIE): self._prefer_source(formats) info['formats'] = formats - parsed_url = compat_urllib_parse_urlparse(url) - query = compat_parse_qs(parsed_url.query) + parsed_url = urllib.parse.urlparse(url) + query = urllib.parse.parse_qs(parsed_url.query) if 't' in query: info['start_time'] = parse_duration(query['t'][0]) @@ -568,7 +564,7 @@ class TwitchVodIE(TwitchBaseIE): info['subtitles'] = { 'rechat': [{ 'url': update_url_query( - 'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, { + f'https://api.twitch.tv/v5/videos/{vod_id}/comments', { 'client_id': self._CLIENT_ID, }), 'ext': 'json', @@ -587,7 +583,7 @@ def _make_video_result(node): '_type': 'url_transparent', 'ie_key': TwitchVodIE.ie_key(), 'id': 'v' + video_id, - 'url': 'https://www.twitch.tv/videos/%s' % video_id, + 'url': f'https://www.twitch.tv/videos/{video_id}', 'title': node.get('title'), 'thumbnail': node.get('previewThumbnailURL'), 'duration': float_or_none(node.get('lengthSeconds')), @@ -638,7 +634,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): def _entries(self, channel_name, *args): cursor = None variables_common = self._make_variables(channel_name, *args) - entries_key = '%ss' % self._ENTRY_KIND + entries_key = f'{self._ENTRY_KIND}s' for page_num in itertools.count(1): variables = variables_common.copy() variables['limit'] = self._PAGE_LIMIT @@ -649,7 +645,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): 'operationName': self._OPERATION_NAME, 'variables': variables, }], - 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num), + f'Downloading {self._NODE_KIND}s GraphQL page {page_num}', fatal=False) if not page: break @@ -671,7 +667,7 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): if entry: cursor = edge.get('cursor') yield entry - if not cursor or not isinstance(cursor, compat_str): + if not cursor or not isinstance(cursor, str): break @@ -765,7 +761,7 @@ class TwitchVideosIE(TwitchPlaylistBaseIE): if any(ie.suitable(url) for ie in ( TwitchVideosClipsIE, TwitchVideosCollectionsIE)) - else super(TwitchVideosIE, cls).suitable(url)) + else super().suitable(url)) @staticmethod def _make_variables(channel_name, broadcast_type, sort): @@ -782,15 +778,15 @@ class TwitchVideosIE(TwitchPlaylistBaseIE): def _real_extract(self, url): channel_name = self._match_id(url) qs = parse_qs(url) - filter = qs.get('filter', ['all'])[0] + video_filter = qs.get('filter', ['all'])[0] sort = qs.get('sort', ['time'])[0] - broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST) + broadcast = self._BROADCASTS.get(video_filter, self._DEFAULT_BROADCAST) return self.playlist_result( self._entries(channel_name, broadcast.type, sort), playlist_id=channel_name, - playlist_title='%s - %s sorted by %s' - % (channel_name, broadcast.label, - self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY))) + playlist_title=( + f'{channel_name} - {broadcast.label} ' + f'sorted by {self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)}')) class TwitchVideosClipsIE(TwitchPlaylistBaseIE): @@ -828,11 +824,11 @@ class TwitchVideosClipsIE(TwitchPlaylistBaseIE): _NODE_KIND = 'Clip' @staticmethod - def _make_variables(channel_name, filter): + def _make_variables(channel_name, channel_filter): return { 'login': channel_name, 'criteria': { - 'filter': filter, + 'filter': channel_filter, }, } @@ -858,12 +854,12 @@ class TwitchVideosClipsIE(TwitchPlaylistBaseIE): def _real_extract(self, url): channel_name = self._match_id(url) qs = parse_qs(url) - range = qs.get('range', ['7d'])[0] - clip = self._RANGE.get(range, self._DEFAULT_CLIP) + date_range = qs.get('range', ['7d'])[0] + clip = self._RANGE.get(date_range, self._DEFAULT_CLIP) return self.playlist_result( self._entries(channel_name, clip.filter), playlist_id=channel_name, - playlist_title='%s - Clips %s' % (channel_name, clip.label)) + playlist_title=f'{channel_name} - Clips {clip.label}') class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): @@ -907,7 +903,7 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): '_type': 'url_transparent', 'ie_key': TwitchCollectionIE.ie_key(), 'id': collection_id, - 'url': 'https://www.twitch.tv/collections/%s' % collection_id, + 'url': f'https://www.twitch.tv/collections/{collection_id}', 'title': node.get('title'), 'thumbnail': node.get('thumbnailURL'), 'duration': float_or_none(node.get('lengthSeconds')), @@ -919,7 +915,7 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): channel_name = self._match_id(url) return self.playlist_result( self._entries(channel_name), playlist_id=channel_name, - playlist_title='%s - Collections' % channel_name) + playlist_title=f'{channel_name} - Collections') class TwitchStreamIE(TwitchBaseIE): @@ -996,7 +992,7 @@ class TwitchStreamIE(TwitchBaseIE): TwitchVideosClipsIE, TwitchVideosCollectionsIE, TwitchClipsIE)) - else super(TwitchStreamIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): channel_name = self._match_id(url).lower() @@ -1025,7 +1021,7 @@ class TwitchStreamIE(TwitchBaseIE): if not user: raise ExtractorError( - '%s does not exist' % channel_name, expected=True) + f'{channel_name} does not exist', expected=True) stream = user['stream'] @@ -1046,16 +1042,16 @@ class TwitchStreamIE(TwitchBaseIE): sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {} uploader = sq_user.get('displayName') description = try_get( - sq_user, lambda x: x['broadcastSettings']['title'], compat_str) + sq_user, lambda x: x['broadcastSettings']['title'], str) thumbnail = url_or_none(try_get( gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'], - compat_str)) + str)) title = uploader or channel_name stream_type = stream.get('type') if stream_type in ['rerun', 'live']: - title += ' (%s)' % stream_type + title += f' ({stream_type})' return { 'id': stream_id, @@ -1165,7 +1161,7 @@ class TwitchClipsIE(TwitchBaseIE): } viewCount } -}''' % video_id}, 'Downloading clip GraphQL', fatal=False) +}''' % video_id}, 'Downloading clip GraphQL', fatal=False) # noqa: UP031 if data: clip = try_get(data, lambda x: x['data']['clip'], dict) or clip @@ -1213,7 +1209,7 @@ class TwitchClipsIE(TwitchBaseIE): 'view_count': int_or_none(clip.get('viewCount')), 'timestamp': unified_timestamp(clip.get('createdAt')), 'thumbnails': thumbnails, - 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str), - 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str), - 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str), + 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], str), + 'uploader': try_get(clip, lambda x: x['curator']['displayName'], str), + 'uploader_id': try_get(clip, lambda x: x['curator']['id'], str), } diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 1a11162..d056797 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -2,14 +2,10 @@ import functools import json import random import re +import urllib.parse from .common import InfoExtractor from .periscope import PeriscopeBaseIE, PeriscopeIE -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, -) from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -18,6 +14,7 @@ from ..utils import ( float_or_none, format_field, int_or_none, + join_nonempty, make_archive_id, remove_end, str_or_none, @@ -46,9 +43,9 @@ class TwitterBaseIE(InfoExtractor): 'flow_context': { 'debug_overrides': {}, 'start_location': { - 'location': 'unknown' - } - } + 'location': 'unknown', + }, + }, }, 'subtask_versions': { 'action_list': 2, @@ -91,8 +88,8 @@ class TwitterBaseIE(InfoExtractor): 'user_recommendations_list': 4, 'user_recommendations_urt': 1, 'wait_spinner': 3, - 'web_modal': 1 - } + 'web_modal': 1, + }, }, separators=(',', ':')).encode() def _extract_variant_formats(self, variant, video_id): @@ -111,7 +108,7 @@ class TwitterBaseIE(InfoExtractor): tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None f = { 'url': variant_url, - 'format_id': 'http' + ('-%d' % tbr if tbr else ''), + 'format_id': join_nonempty('http', tbr), 'tbr': tbr, } self._search_dimensions_in_video_url(f, variant_url) @@ -126,7 +123,7 @@ class TwitterBaseIE(InfoExtractor): subtitles = {} urls = [] for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'): - video_variant.attrib['url'] = compat_urllib_parse_unquote( + video_variant.attrib['url'] = urllib.parse.unquote( video_variant.attrib['url']) urls.append(video_variant.attrib['url']) fmts, subs = self._extract_variant_formats( @@ -218,7 +215,7 @@ class TwitterBaseIE(InfoExtractor): def build_login_json(*subtask_inputs): return json.dumps({ 'flow_token': self._flow_token, - 'subtask_inputs': subtask_inputs + 'subtask_inputs': subtask_inputs, }, separators=(',', ':')).encode() def input_dict(subtask_id, text): @@ -226,8 +223,8 @@ class TwitterBaseIE(InfoExtractor): 'subtask_id': subtask_id, 'enter_text': { 'text': text, - 'link': 'next_link' - } + 'link': 'next_link', + }, } next_subtask = self._call_login_api( @@ -240,8 +237,8 @@ class TwitterBaseIE(InfoExtractor): 'subtask_id': next_subtask, 'js_instrumentation': { 'response': '{}', - 'link': 'next_link' - } + 'link': 'next_link', + }, })) elif next_subtask == 'LoginEnterUserIdentifierSSO': @@ -253,12 +250,12 @@ class TwitterBaseIE(InfoExtractor): 'key': 'user_identifier', 'response_data': { 'text_data': { - 'result': username - } - } + 'result': username, + }, + }, }], - 'link': 'next_link' - } + 'link': 'next_link', + }, })) elif next_subtask == 'LoginEnterAlternateIdentifierSubtask': @@ -273,8 +270,8 @@ class TwitterBaseIE(InfoExtractor): 'subtask_id': next_subtask, 'enter_password': { 'password': password, - 'link': 'next_link' - } + 'link': 'next_link', + }, })) elif next_subtask == 'AccountDuplicationCheck': @@ -282,8 +279,8 @@ class TwitterBaseIE(InfoExtractor): 'Submitting account duplication check', headers, data=build_login_json({ 'subtask_id': next_subtask, 'check_logged_in_account': { - 'link': 'AccountDuplicationCheck_false' - } + 'link': 'AccountDuplicationCheck_false', + }, })) elif next_subtask == 'LoginTwoFactorAuthChallenge': @@ -317,7 +314,7 @@ class TwitterBaseIE(InfoExtractor): 'x-twitter-client-language': 'en', 'x-twitter-active-user': 'yes', } if self.is_logged_in else { - 'x-guest-token': self._fetch_guest_token(video_id) + 'x-guest-token': self._fetch_guest_token(video_id), }) allowed_status = {400, 401, 403, 404} if graphql else {403} result = self._download_json( @@ -388,7 +385,7 @@ class TwitterCardIE(InfoExtractor): 'repost_count': int, 'tags': ['PlutoFlyby'], }, - 'params': {'format': '[protocol=https]'} + 'params': {'format': '[protocol=https]'}, }, { 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977', @@ -1221,7 +1218,7 @@ class TwitterIE(TwitterBaseIE): 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+', 'age_limit': 0, '_old_archive_ids': ['twitter 1790637656616943991'], - } + }, }, { # onion route 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273', @@ -1374,11 +1371,11 @@ class TwitterIE(TwitterBaseIE): 'responsive_web_media_download_video_enabled': False, 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False, 'responsive_web_graphql_timeline_navigation_enabled': True, - 'responsive_web_enhance_cards_enabled': False + 'responsive_web_enhance_cards_enabled': False, }, 'fieldToggles': { - 'withArticleRichContentState': False - } + 'withArticleRichContentState': False, + }, } def _call_syndication_api(self, twid): @@ -1644,9 +1641,9 @@ class TwitterAmplifyIE(TwitterBaseIE): def _find_dimension(target): w = int_or_none(self._html_search_meta( - 'twitter:%s:width' % target, webpage, fatal=False)) + f'twitter:{target}:width', webpage, fatal=False)) h = int_or_none(self._html_search_meta( - 'twitter:%s:height' % target, webpage, fatal=False)) + f'twitter:{target}:height', webpage, fatal=False)) return w, h if thumbnail: @@ -1740,7 +1737,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE): m3u8_url = source.get('noRedirectPlaybackUrl') or source['location'] if '/live_video_stream/geoblocked/' in m3u8_url: self.raise_geo_restricted() - m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse( + m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse( m3u8_url).query).get('type', [None])[0] state, width, height = self._extract_common_format_info(broadcast) info['formats'] = self._extract_pscp_m3u8_formats( @@ -1895,12 +1892,12 @@ class TwitterShortenerIE(TwitterBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) - eid, id = mobj.group('eid', 'id') + eid, shortcode = mobj.group('eid', 'id') if eid: - id = eid - url = self._BASE_URL + id - new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url - __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link=" + shortcode = eid + url = self._BASE_URL + shortcode + new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url + __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link=' if new_url.startswith(__UNSAFE_LINK): - new_url = new_url.replace(__UNSAFE_LINK, "") + new_url = new_url.replace(__UNSAFE_LINK, '') return self.url_result(new_url) diff --git a/yt_dlp/extractor/txxx.py b/yt_dlp/extractor/txxx.py index 77dabbc..488c13b 100644 --- a/yt_dlp/extractor/txxx.py +++ b/yt_dlp/extractor/txxx.py @@ -85,7 +85,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg', - } + }, }, { 'url': 'https://txxx.tube/videos/16574965/digital-desire-malena-morgan/', 'md5': 'c54e4ace54320aaf8e2a72df87859391', @@ -101,7 +101,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.txxx.tube/contents/videos_sources/16574000/16574965/screenshots/1.jpg', - } + }, }, { 'url': 'https://vxxx.com/video-68925/', 'md5': '1fcff3748b0c5b41fe41d0afa22409e1', @@ -117,7 +117,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.vxxx.com/contents/videos_sources/68000/68925/screenshots/1.jpg', - } + }, }, { 'url': 'https://hclips.com/videos/6291073/malena-morgan-masturbates-her-sweet/', 'md5': 'a5dd4f83363972ee043313cff85e7e26', @@ -133,7 +133,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/6291000/6291073/screenshots/1.jpg', - } + }, }, { 'url': 'https://hdzog.com/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/', 'md5': 'f8bdedafd45d1ec2875c43fe33a846d3', @@ -149,7 +149,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg', - } + }, }, { 'url': 'https://hdzog.tube/videos/67063/gorgeous-malena-morgan-will-seduce-you-at-the-first-glance/', 'md5': 'f8bdedafd45d1ec2875c43fe33a846d3', @@ -165,7 +165,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.hdzog.com/contents/videos_sources/67000/67063/screenshots/1.jpg', - } + }, }, { 'url': 'https://hotmovs.com/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/', 'md5': '71d32c51584876472db87e561171a386', @@ -181,7 +181,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg', - } + }, }, { 'url': 'https://hotmovs.tube/videos/8789287/unbelievable-malena-morgan-performing-in-incredible-masturantion/', 'md5': '71d32c51584876472db87e561171a386', @@ -197,7 +197,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.hotmovs.com/contents/videos_sources/8789000/8789287/screenshots/10.jpg', - } + }, }, { 'url': 'https://inporn.com/video/517897/malena-morgan-solo/', 'md5': '344db467481edf78f193cdf5820a7cfb', @@ -213,7 +213,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://iptn.m3pd.com/media/tn/sources/517897_1.jpg', - } + }, }, { 'url': 'https://privatehomeclips.com/videos/3630599/malena-morgan-cam-show/', 'md5': 'ea657273e352493c5fb6357fbfa4f126', @@ -229,7 +229,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/3630000/3630599/screenshots/15.jpg', - } + }, }, { 'url': 'https://tubepornclassic.com/videos/1015455/mimi-rogers-full-body-massage-nude-compilation/', 'md5': '2e9a6cf610c9862e86e0ce24f08f4427', @@ -245,7 +245,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.tubepornclassic.com/contents/videos_sources/1015000/1015455/screenshots/6.jpg', - } + }, }, { 'url': 'https://upornia.com/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/', 'md5': '7ff7033340bc88a173198b7c22600e4f', @@ -261,7 +261,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg', - } + }, }, { 'url': 'https://upornia.tube/videos/1498858/twistys-malena-morgan-starring-at-dr-morgan-baller/', 'md5': '7ff7033340bc88a173198b7c22600e4f', @@ -277,7 +277,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.upornia.com/contents/videos_sources/1498000/1498858/screenshots/1.jpg', - } + }, }, { 'url': 'https://vjav.com/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/', 'md5': '6de5bc1f13bdfc3491a77f23edb1676f', @@ -293,7 +293,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg', - } + }, }, { 'url': 'https://vjav.tube/videos/11761/yui-hatano-in-if-yui-was-my-girlfriend2/', 'md5': '6de5bc1f13bdfc3491a77f23edb1676f', @@ -309,7 +309,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.vjav.com/contents/videos_sources/11000/11761/screenshots/23.jpg', - } + }, }, { 'url': 'https://voyeurhit.com/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/', 'md5': '12b4666e9c3e60dafe9182e5d12aae33', @@ -325,7 +325,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg', - } + }, }, { 'url': 'https://voyeurhit.tube/videos/332875/charlotte-stokely-elle-alexandra-malena-morgan-lingerie/', 'md5': '12b4666e9c3e60dafe9182e5d12aae33', @@ -341,7 +341,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://tn.voyeurhit.com/contents/videos_sources/332000/332875/screenshots/1.jpg', - } + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://pornzog.com/video/9125519/michelle-malone-dreamgirls-wild-wet-3/', @@ -357,7 +357,7 @@ class TxxxIE(InfoExtractor): 'dislike_count': int, 'age_limit': 18, 'thumbnail': 'https://hctn.nv7s.com/contents/videos_sources/5119000/5119660/screenshots/1.jpg', - } + }, }] def _call_api(self, url, video_id, fatal=False, **kwargs): @@ -415,7 +415,7 @@ class PornTopIE(InfoExtractor): 'timestamp': 1609455029, 'upload_date': '20201231', 'thumbnail': 'https://tn.porntop.com/media/tn/sources/101569_1.jpg', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 5c29605..2801aa8 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_str, compat_urlparse from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( @@ -73,22 +73,21 @@ class UdemyIE(InfoExtractor): course_id = course.get('id') or self._search_regex( [ r'data-course-id=["\'](\d+)', - r'"courseId"\s*:\s*(\d+)' + r'"courseId"\s*:\s*(\d+)', ], webpage, 'course id') return course_id, course.get('title') def _enroll_course(self, base_url, webpage, course_id): def combine_url(base_url, url): - return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url + return urllib.parse.urljoin(base_url, url) if not url.startswith('http') else url checkout_url = unescapeHTML(self._search_regex( r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1', webpage, 'checkout url', group='url', default=None)) if checkout_url: raise ExtractorError( - 'Course %s is not free. You have to pay for it before you can download. ' - 'Use this URL to confirm purchase: %s' - % (course_id, combine_url(base_url, checkout_url)), + f'Course {course_id} is not free. You have to pay for it before you can download. ' + f'Use this URL to confirm purchase: {combine_url(base_url, checkout_url)}', expected=True) enroll_url = unescapeHTML(self._search_regex( @@ -100,12 +99,11 @@ class UdemyIE(InfoExtractor): course_id, 'Enrolling in the course', headers={'Referer': base_url}) if '>You have enrolled in' in webpage: - self.to_screen('%s: Successfully enrolled in the course' % course_id) + self.to_screen(f'{course_id}: Successfully enrolled in the course') def _download_lecture(self, course_id, lecture_id): return self._download_json( - 'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?' - % (course_id, lecture_id), + f'https://www.udemy.com/api-2.0/users/me/subscribed-courses/{course_id}/lectures/{lecture_id}?', lecture_id, 'Downloading lecture JSON', query={ 'fields[lecture]': 'title,description,view_html,asset', 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed', @@ -116,17 +114,17 @@ class UdemyIE(InfoExtractor): return error = response.get('error') if error: - error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message')) + error_str = 'Udemy returned error #{}: {}'.format(error.get('code'), error.get('message')) error_data = error.get('data') if error_data: - error_str += ' - %s' % error_data.get('formErrors') + error_str += ' - {}'.format(error_data.get('formErrors')) raise ExtractorError(error_str, expected=True) def _download_webpage_handle(self, *args, **kwargs): headers = kwargs.get('headers', {}).copy() headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' kwargs['headers'] = headers - ret = super(UdemyIE, self)._download_webpage_handle( + ret = super()._download_webpage_handle( *args, **kwargs) if not ret: return ret @@ -151,14 +149,14 @@ class UdemyIE(InfoExtractor): headers['X-Udemy-Client-Id'] = cookie.value elif cookie.name == 'access_token': headers['X-Udemy-Bearer-Token'] = cookie.value - headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value + headers['X-Udemy-Authorization'] = f'Bearer {cookie.value}' if isinstance(url_or_request, Request): url_or_request.headers.update(headers) else: url_or_request = Request(url_or_request, headers=headers) - response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs) + response = super()._download_json(url_or_request, *args, **kwargs) self._handle_error(response) return response @@ -195,7 +193,7 @@ class UdemyIE(InfoExtractor): r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>', response, 'error message', default=None) if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') def _real_extract(self, url): @@ -226,7 +224,7 @@ class UdemyIE(InfoExtractor): asset_type = asset.get('asset_type') or asset.get('assetType') if asset_type != 'Video': raise ExtractorError( - 'Lecture %s is not a video' % lecture_id, expected=True) + f'Lecture {lecture_id} is not a video', expected=True) stream_url = asset.get('stream_url') or asset.get('streamUrl') if stream_url: @@ -235,7 +233,7 @@ class UdemyIE(InfoExtractor): if youtube_url: return self.url_result(youtube_url, 'Youtube') - video_id = compat_str(asset['id']) + video_id = str(asset['id']) thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl') duration = float_or_none(asset.get('data', {}).get('duration')) @@ -287,7 +285,7 @@ class UdemyIE(InfoExtractor): format_id = source.get('label') f = { 'url': video_url, - 'format_id': '%sp' % format_id, + 'format_id': f'{format_id}p', 'height': int_or_none(format_id), } if format_id: @@ -316,7 +314,7 @@ class UdemyIE(InfoExtractor): }) for url_kind in ('download', 'stream'): - urls = asset.get('%s_urls' % url_kind) + urls = asset.get(f'{url_kind}_urls') if isinstance(urls, dict): extract_formats(urls.get('Video')) @@ -328,7 +326,7 @@ class UdemyIE(InfoExtractor): cc_url = url_or_none(cc.get('url')) if not cc_url: continue - lang = try_get(cc, lambda x: x['locale']['locale'], compat_str) + lang = try_get(cc, lambda x: x['locale']['locale'], str) sub_dict = (automatic_captions if cc.get('source') == 'auto' else subtitles) sub_dict.setdefault(lang or 'en', []).append({ @@ -363,7 +361,7 @@ class UdemyIE(InfoExtractor): else: formats.append(add_output_format_meta({ 'url': src, - 'format_id': '%dp' % height if height else None, + 'format_id': f'{height}p' if height else None, 'height': height, }, res)) @@ -423,7 +421,7 @@ class UdemyCourseIE(UdemyIE): # XXX: Do not subclass from concrete IE @classmethod def suitable(cls, url): - return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url) + return False if UdemyIE.suitable(url) else super().suitable(url) def _real_extract(self, url): course_path = self._match_id(url) @@ -435,7 +433,7 @@ class UdemyCourseIE(UdemyIE): # XXX: Do not subclass from concrete IE self._enroll_course(url, webpage, course_id) response = self._download_json( - 'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id, + f'https://www.udemy.com/api-2.0/courses/{course_id}/cached-subscriber-curriculum-items', course_id, 'Downloading course curriculum', query={ 'fields[chapter]': 'title,object_index', 'fields[lecture]': 'title,asset', diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py index d5849d2..9970e4f 100644 --- a/yt_dlp/extractor/udn.py +++ b/yt_dlp/extractor/udn.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( determine_ext, int_or_none, @@ -13,7 +13,7 @@ class UDNEmbedIE(InfoExtractor): IE_DESC = '聯合影音' _PROTOCOL_RELATIVE_VALID_URL = r'//video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)' _VALID_URL = r'https?:' + _PROTOCOL_RELATIVE_VALID_URL - _EMBED_REGEX = [r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % _PROTOCOL_RELATIVE_VALID_URL] + _EMBED_REGEX = [rf'<iframe[^>]+src="(?:https?:)?(?P<url>{_PROTOCOL_RELATIVE_VALID_URL})"'] _TESTS = [{ 'url': 'http://video.udn.com/embed/news/300040', 'info_dict': { @@ -66,8 +66,8 @@ class UDNEmbedIE(InfoExtractor): continue video_url = self._download_webpage( - compat_urlparse.urljoin(url, api_url), video_id, - note='retrieve url for %s video' % video_type) + urllib.parse.urljoin(url, api_url), video_id, + note=f'retrieve url for {video_type} video') ext = determine_ext(video_url) if ext == 'm3u8': diff --git a/yt_dlp/extractor/uktvplay.py b/yt_dlp/extractor/uktvplay.py index ab22a8e..9abe343 100644 --- a/yt_dlp/extractor/uktvplay.py +++ b/yt_dlp/extractor/uktvplay.py @@ -18,7 +18,7 @@ class UKTVPlayIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'expected_warnings': ['Failed to download MPD manifest'] + 'expected_warnings': ['Failed to download MPD manifest'], }, { 'url': 'https://uktvplay.uktv.co.uk/shows/africa/watch-online/5983349675001', 'only_matching': True, diff --git a/yt_dlp/extractor/umg.py b/yt_dlp/extractor/umg.py index 1da4ecd..b509fda 100644 --- a/yt_dlp/extractor/umg.py +++ b/yt_dlp/extractor/umg.py @@ -20,7 +20,7 @@ class UMGDeIE(InfoExtractor): 'title': 'Jedes Wort ist Gold wert', 'timestamp': 1513591800, 'upload_date': '20171218', - } + }, } def _real_extract(self, url): @@ -45,7 +45,7 @@ class UMGDeIE(InfoExtractor): createdDate } } -}''' % video_id})['data']['universalMusic']['video'] +}''' % video_id})['data']['universalMusic']['video'] # noqa: UP031 title = video_data['headline'] hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8' diff --git a/yt_dlp/extractor/unistra.py b/yt_dlp/extractor/unistra.py index 6e872cd..edc1f85 100644 --- a/yt_dlp/extractor/unistra.py +++ b/yt_dlp/extractor/unistra.py @@ -27,7 +27,7 @@ class UnistraIE(InfoExtractor): 'title': 'Prix Louise Weiss 2014', 'description': 'md5:cc3a8735f079f4fb6b0b570fc10c135a', }, - } + }, ] def _real_extract(self, url): @@ -43,9 +43,9 @@ class UnistraIE(InfoExtractor): for file_path in files: format_id = 'HD' if file_path.endswith('-HD.mp4') else 'SD' formats.append({ - 'url': 'http://vod-flash.u-strasbg.fr:8080%s' % file_path, + 'url': f'http://vod-flash.u-strasbg.fr:8080{file_path}', 'format_id': format_id, - 'quality': quality(format_id) + 'quality': quality(format_id), }) title = self._html_search_regex( @@ -60,5 +60,5 @@ class UnistraIE(InfoExtractor): 'title': title, 'description': description, 'thumbnail': thumbnail, - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/unity.py b/yt_dlp/extractor/unity.py index 6d8bc05..7d6ffb0 100644 --- a/yt_dlp/extractor/unity.py +++ b/yt_dlp/extractor/unity.py @@ -16,7 +16,7 @@ class UnityIE(InfoExtractor): 'uploader': 'Unity', 'uploader_id': 'Unity3D', 'upload_date': '20140926', - } + }, }, { 'url': 'https://unity3d.com/learn/tutorials/projects/2d-ufo-tutorial/following-player-camera?playlist=25844', 'only_matching': True, diff --git a/yt_dlp/extractor/uol.py b/yt_dlp/extractor/uol.py index 068c2b8..5198e89 100644 --- a/yt_dlp/extractor/uol.py +++ b/yt_dlp/extractor/uol.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( clean_html, int_or_none, @@ -26,7 +24,7 @@ class UOLIE(InfoExtractor): 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2', 'timestamp': 1470421860, 'upload_date': '20160805', - } + }, }, { 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326', 'md5': '2850a0e8dfa0a7307e04a96c5bdc5bc2', @@ -37,7 +35,7 @@ class UOLIE(InfoExtractor): 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.', 'timestamp': 1470674520, 'upload_date': '20160808', - } + }, }, { 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931', 'only_matching': True, @@ -68,12 +66,12 @@ class UOLIE(InfoExtractor): # https://api.mais.uol.com.br/apiuol/v4/player/data/[MEDIA_ID] 'https://api.mais.uol.com.br/apiuol/v3/media/detail/' + video_id, video_id)['item'] - media_id = compat_str(video_data['mediaId']) + media_id = str(video_data['mediaId']) title = video_data['title'] ver = video_data.get('revision', 2) uol_formats = self._download_json( - 'https://croupier.mais.uol.com.br/v3/formats/%s/jsonp' % media_id, + f'https://croupier.mais.uol.com.br/v3/formats/{media_id}/jsonp', media_id) quality = qualities(['mobile', 'WEBM', '360p', '720p', '1080p']) formats = [] @@ -96,7 +94,7 @@ class UOLIE(InfoExtractor): m3u8_formats = self._extract_m3u8_formats( f_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - encoded_query = compat_urllib_parse_urlencode(query) + encoded_query = urllib.parse.urlencode(query) for m3u8_f in m3u8_formats: m3u8_f['extra_param_to_segment_url'] = encoded_query m3u8_f['url'] = update_url_query(m3u8_f['url'], query) diff --git a/yt_dlp/extractor/urort.py b/yt_dlp/extractor/urort.py index f14d7cc..0693129 100644 --- a/yt_dlp/extractor/urort.py +++ b/yt_dlp/extractor/urort.py @@ -23,22 +23,22 @@ class UrortIE(InfoExtractor): }, 'params': { 'matchtitle': '^The Bomb$', # To test, we want just one video - } + }, } def _real_extract(self, url): playlist_id = self._match_id(url) - fstr = urllib.parse.quote("InternalBandUrl eq '%s'" % playlist_id) - json_url = 'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter=%s&$orderby=Released%%20desc&$expand=Tags%%2CFiles' % fstr + fstr = urllib.parse.quote(f"InternalBandUrl eq '{playlist_id}'") + json_url = f'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter={fstr}&$orderby=Released%20desc&$expand=Tags%2CFiles' songs = self._download_json(json_url, playlist_id) entries = [] for s in songs: formats = [{ 'tbr': f.get('Quality'), 'ext': f['FileType'], - 'format_id': '%s-%s' % (f['FileType'], f.get('Quality', '')), - 'url': 'http://p3urort.blob.core.windows.net/tracks/%s' % f['FileRef'], + 'format_id': '{}-{}'.format(f['FileType'], f.get('Quality', '')), + 'url': 'http://p3urort.blob.core.windows.net/tracks/{}'.format(f['FileRef']), 'quality': 3 if f['FileType'] == 'mp3' else 2, } for f in s['Files']] e = { @@ -46,7 +46,7 @@ class UrortIE(InfoExtractor): 'title': s['Title'], 'uploader_id': playlist_id, 'uploader': s.get('BandName', playlist_id), - 'thumbnail': 'http://urort.p3.no/cloud/images/%s' % s['Image'], + 'thumbnail': 'http://urort.p3.no/cloud/images/{}'.format(s['Image']), 'upload_date': unified_strdate(s.get('Released')), 'formats': formats, } diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index 928e6e1..79bb8a8 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -98,7 +98,7 @@ class URPlayIE(InfoExtractor): file_http = v.get('location') if file_http: formats.extend(self._extract_wowza_formats( - 'http://%s/%splaylist.m3u8' % (host, file_http), + f'http://{host}/{file_http}playlist.m3u8', video_id, skip_protocols=['f4m', 'rtmp', 'rtsp'])) subtitles = {} @@ -116,14 +116,14 @@ class URPlayIE(InfoExtractor): for k, v in stream.items(): if (k in ('sd', 'hd') or not isinstance(v, dict)): continue - lang, sttl_url = (v.get(kk) for kk in ('language', 'location', )) + lang, sttl_url = (v.get(kk) for kk in ('language', 'location')) if not sttl_url: continue lang = parse_lang_code(lang) if not lang: continue sttl = subtitles.get(lang) or [] - sttl.append({'ext': k, 'url': sttl_url, }) + sttl.append({'ext': k, 'url': sttl_url}) subtitles[lang] = sttl image = urplayer_data.get('image') or {} @@ -146,7 +146,7 @@ class URPlayIE(InfoExtractor): return { 'id': video_id, - 'title': '%s : %s' % (series_title, episode) if series_title else episode, + 'title': f'{series_title} : {episode}' if series_title else episode, 'description': urplayer_data.get('description'), 'thumbnails': thumbnails, 'timestamp': unified_timestamp(urplayer_data.get('publishedAt')), diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py index 42a28c5..6ad18ff 100644 --- a/yt_dlp/extractor/usatoday.py +++ b/yt_dlp/extractor/usatoday.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, get_element_by_attribute, @@ -23,7 +22,7 @@ class USATodayIE(InfoExtractor): 'description': 'md5:7e50464fdf2126b0f533748d3c78d58f', 'uploader_id': '29906170001', 'upload_date': '20160313', - } + }, }, { # ui-video-data[asset_metadata][items][brightcoveaccount] = 28911775001 'url': 'https://www.usatoday.com/story/tech/science/2018/08/21/yellowstone-supervolcano-eruption-stop-worrying-its-blow/973633002/', @@ -35,7 +34,7 @@ class USATodayIE(InfoExtractor): 'description': 'md5:3715e7927639a4f16b474e9391687c62', 'uploader_id': '28911775001', 'upload_date': '20180820', - } + }, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' @@ -51,7 +50,7 @@ class USATodayIE(InfoExtractor): return { '_type': 'url_transparent', 'url': self.BRIGHTCOVE_URL_TEMPLATE % (item.get('brightcoveaccount', '29906170001'), item.get('brightcoveid') or video_data['brightcove_id']), - 'id': compat_str(video_data['id']), + 'id': str(video_data['id']), 'title': video_data['title'], 'thumbnail': video_data.get('thumbnail'), 'description': video_data.get('description'), diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index 046e3d7..33cf8f4 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -1,11 +1,8 @@ import random import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( ExtractorError, encode_data_uri, @@ -82,14 +79,14 @@ class UstreamIE(InfoExtractor): extra_note = '' conn_info = self._download_json( - 'http://r%d-1-%s-recorded-lp-live.ums.ustream.tv/1/ustream' % (rnd(1e8), video_id), + f'http://r{rnd(1e8)}-1-{video_id}-recorded-lp-live.ums.ustream.tv/1/ustream', video_id, note='Downloading connection info' + extra_note, query={ 'type': 'viewer', 'appId': app_id_ver[0], 'appVersion': app_id_ver[1], - 'rsid': '%s:%s' % (num_to_hex(rnd(1e8)), num_to_hex(rnd(1e8))), - 'rpin': '_rpin.%d' % rnd(1e15), + 'rsid': f'{num_to_hex(rnd(1e8))}:{num_to_hex(rnd(1e8))}', + 'rpin': f'_rpin.{rnd(1e15)}', 'referrer': url, 'media': video_id, 'application': 'recorded', @@ -98,7 +95,7 @@ class UstreamIE(InfoExtractor): connection_id = conn_info[0]['args'][0]['connectionId'] return self._download_json( - 'http://%s/1/ustream?connectionId=%s' % (host, connection_id), + f'http://{host}/1/ustream?connectionId={connection_id}', video_id, note='Downloading stream info' + extra_note) def _get_streams(self, url, video_id, app_id_ver): @@ -106,14 +103,14 @@ class UstreamIE(InfoExtractor): for trial_count in range(3): stream_info = self._get_stream_info( url, video_id, app_id_ver, - extra_note=' (try %d)' % (trial_count + 1) if trial_count > 0 else '') + extra_note=f' (try {trial_count + 1})' if trial_count > 0 else '') if 'stream' in stream_info[0]['args'][0]: return stream_info[0]['args'][0]['stream'] return [] def _parse_segmented_mp4(self, dash_stream_info): def resolve_dash_template(template, idx, chunk_hash): - return template.replace('%', compat_str(idx), 1).replace('%', chunk_hash) + return template.replace('%', str(idx), 1).replace('%', chunk_hash) formats = [] for stream in dash_stream_info['streams']: @@ -121,13 +118,13 @@ class UstreamIE(InfoExtractor): provider = dash_stream_info['providers'][0] fragments = [{ 'url': resolve_dash_template( - provider['url'] + stream['initUrl'], 0, dash_stream_info['hashes']['0']) + provider['url'] + stream['initUrl'], 0, dash_stream_info['hashes']['0']), }] for idx in range(dash_stream_info['videoLength'] // dash_stream_info['chunkTime']): fragments.append({ 'url': resolve_dash_template( provider['url'] + stream['segmentUrl'], idx, - dash_stream_info['hashes'][compat_str(idx // 10 * 10)]) + dash_stream_info['hashes'][str(idx // 10 * 10)]), }) content_type = stream['contentType'] kind = content_type.split('/')[0] @@ -173,16 +170,16 @@ class UstreamIE(InfoExtractor): r'ustream\.vars\.offAirContentVideoIds=([^;]+);', webpage, 'content video IDs'), video_id) return self.playlist_result( - map(lambda u: self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream'), content_video_ids), + (self.url_result('http://www.ustream.tv/recorded/' + u, 'Ustream') for u in content_video_ids), video_id) params = self._download_json( - 'https://api.ustream.tv/videos/%s.json' % video_id, video_id) + f'https://api.ustream.tv/videos/{video_id}.json', video_id) error = params.get('error') if error: raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error), expected=True) + f'{self.IE_NAME} returned error: {error}', expected=True) video = params['video'] @@ -255,12 +252,12 @@ class UstreamChannelIE(InfoExtractor): channel_id = self._html_search_meta('ustream:channel_id', webpage) BASE = 'http://www.ustream.tv' - next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id + next_url = f'/ajax/socialstream/videos/{channel_id}/1.json' video_ids = [] while next_url: reply = self._download_json( - compat_urlparse.urljoin(BASE, next_url), display_id, - note='Downloading video information (next: %d)' % (len(video_ids) + 1)) + urllib.parse.urljoin(BASE, next_url), display_id, + note=f'Downloading video information (next: {len(video_ids) + 1})') video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data'])) next_url = reply['nextUrl'] diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py index f6ce5b3..b5da88b 100644 --- a/yt_dlp/extractor/ustudio.py +++ b/yt_dlp/extractor/ustudio.py @@ -21,14 +21,14 @@ class UstudioIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20111107', 'uploader': 'Tony Farley', - } + }, } def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).groups() config = self._download_xml( - 'http://v1.ustudio.com/embed/%s/ustudio/config.xml' % video_id, + f'http://v1.ustudio.com/embed/{video_id}/ustudio/config.xml', display_id) def extract(kind): @@ -36,7 +36,7 @@ class UstudioIE(InfoExtractor): 'url': unescapeHTML(item.attrib['url']), 'width': int_or_none(item.get('width')), 'height': int_or_none(item.get('height')), - } for item in config.findall('./qualities/quality/%s' % kind) if item.get('url')] + } for item in config.findall(f'./qualities/quality/{kind}') if item.get('url')] formats = extract('video') @@ -74,13 +74,13 @@ class UstudioEmbedIE(InfoExtractor): 'title': '5 Things IT Should Know About Video', 'description': 'md5:93d32650884b500115e158c5677d25ad', 'uploader_id': 'DeN7VdYRDKhP', - } + }, } def _real_extract(self, url): uploader_id, video_id = self._match_valid_url(url).groups() video_data = self._download_json( - 'http://app.ustudio.com/embed/%s/%s/config.json' % (uploader_id, video_id), + f'http://app.ustudio.com/embed/{uploader_id}/{video_id}/config.json', video_id)['videos'][0] title = video_data['name'] @@ -92,7 +92,7 @@ class UstudioEmbedIE(InfoExtractor): continue height = int_or_none(quality.get('height')) formats.append({ - 'format_id': '%s-%dp' % (ext, height) if height else ext, + 'format_id': f'{ext}-{height}p' if height else ext, 'url': quality_url, 'width': int_or_none(quality.get('width')), 'height': height, diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py index 12a7e49..fc44df9 100644 --- a/yt_dlp/extractor/utreon.py +++ b/yt_dlp/extractor/utreon.py @@ -23,7 +23,7 @@ class UtreonIE(InfoExtractor): 'thumbnail': r're:^https?://.+\.jpg', 'release_date': '20210723', 'duration': 586, - } + }, }, { 'url': 'https://utreon.com/v/jerJw5EOOVU', 'info_dict': { @@ -35,7 +35,7 @@ class UtreonIE(InfoExtractor): 'thumbnail': r're:^https?://.+\.jpg', 'release_date': '20210723', 'duration': 60, - } + }, }, { 'url': 'https://utreon.com/v/C4ZxXhYBBmE', 'info_dict': { @@ -47,7 +47,7 @@ class UtreonIE(InfoExtractor): 'thumbnail': r're:^https?://.+\.jpg', 'release_date': '20210723', 'duration': 884, - } + }, }, { 'url': 'https://utreon.com/v/Y-stEH-FBm8', 'info_dict': { @@ -59,7 +59,7 @@ class UtreonIE(InfoExtractor): 'thumbnail': r're:^https?://.+\.jpg', 'release_date': '20210718', 'duration': 151, - } + }, }, { 'url': 'https://playeur.com/v/Wzqp-UrxSeu', 'info_dict': { @@ -71,7 +71,7 @@ class UtreonIE(InfoExtractor): 'release_date': '20240208', 'thumbnail': r're:^https?://.+\.jpg', 'duration': 262, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index 205f8ea..9d3c39f 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -22,7 +22,7 @@ class VeoIE(InfoExtractor): 'timestamp': 1603847208, 'duration': 1916, 'view_count': int, - } + }, }, { 'url': 'https://app.veo.co/matches/20220313-2022-03-13_u15m-plsjq-vs-csl/', 'only_matching': True, @@ -32,10 +32,10 @@ class VeoIE(InfoExtractor): video_id = self._match_id(url) metadata = self._download_json( - 'https://app.veo.co/api/app/matches/%s' % video_id, video_id) + f'https://app.veo.co/api/app/matches/{video_id}', video_id) video_data = self._download_json( - 'https://app.veo.co/api/app/matches/%s/videos' % video_id, video_id, 'Downloading video data') + f'https://app.veo.co/api/app/matches/{video_id}/videos', video_id, 'Downloading video data') formats = [] for fmt in video_data: diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py index 92ff865..dc1bf96 100644 --- a/yt_dlp/extractor/veoh.py +++ b/yt_dlp/extractor/veoh.py @@ -80,7 +80,7 @@ class VeohIE(InfoExtractor): 'age_limit': 18, 'categories': ['technology_and_gaming', 'gaming'], 'tags': ['puzzle', 'of', 'flesh'], - } + }, }] def _real_extract(self, url): @@ -138,17 +138,17 @@ class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE 'url': 'https://www.veoh.com/users/valentinazoe', 'info_dict': { 'id': 'valentinazoe', - 'title': 'valentinazoe (Uploads)' + 'title': 'valentinazoe (Uploads)', }, - 'playlist_mincount': 75 + 'playlist_mincount': 75, }, { 'url': 'https://www.veoh.com/users/PiensaLibre', 'info_dict': { 'id': 'PiensaLibre', - 'title': 'PiensaLibre (Uploads)' + 'title': 'PiensaLibre (Uploads)', }, - 'playlist_mincount': 2 + 'playlist_mincount': 2, }] _PAGE_SIZE = 16 @@ -159,14 +159,14 @@ class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE note=f'Downloading videos page {page + 1}', headers={ 'x-csrf-token': self._TOKEN, - 'content-type': 'application/json;charset=UTF-8' + 'content-type': 'application/json;charset=UTF-8', }, data=json.dumps({ 'username': uploader, 'maxResults': self._PAGE_SIZE, 'page': page + 1, - 'requestName': 'userPage' - }).encode('utf-8')) + 'requestName': 'userPage', + }).encode()) if not response.get('success'): raise ExtractorError(response['message']) diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py index a2e9022..844041a 100644 --- a/yt_dlp/extractor/vesti.py +++ b/yt_dlp/extractor/vesti.py @@ -94,7 +94,7 @@ class VestiIE(InfoExtractor): # rtmp download 'skip_download': True, }, - 'skip': 'Translation has finished' + 'skip': 'Translation has finished', }, ] @@ -109,7 +109,7 @@ class VestiIE(InfoExtractor): page) if mobj: video_id = mobj.group('id') - page = self._download_webpage('http://www.vesti.ru/only_video.html?vid=%s' % video_id, video_id, + page = self._download_webpage(f'http://www.vesti.ru/only_video.html?vid={video_id}', video_id, 'Downloading video page') rutv_url = RUTVIE._extract_url(page) diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index 7715d68..8552a60 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -2,7 +2,6 @@ import json import re from .common import InfoExtractor -from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -22,10 +21,10 @@ class VevoBaseIE(InfoExtractor): class VevoIE(VevoBaseIE): - ''' + """ Accepts urls from vevo.com or in the format 'vevo:{id}' (currently used by MTVIE and MySpaceIE) - ''' + """ _VALID_URL = r'''(?x) (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| @@ -166,14 +165,14 @@ class VevoIE(VevoBaseIE): data=json.dumps({ 'client_id': 'SPupX1tvqFEopQ1YS6SS', 'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous', - }).encode('utf-8'), + }).encode(), headers={ 'Content-Type': 'application/json', }) if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage): self.raise_geo_restricted( - '%s said: This page is currently unavailable in your region' % self.IE_NAME) + f'{self.IE_NAME} said: This page is currently unavailable in your region') auth_info = self._parse_json(webpage, video_id) self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token'] @@ -185,7 +184,7 @@ class VevoIE(VevoBaseIE): if isinstance(e.cause, HTTPError): errors = self._parse_json(e.cause.response.read().decode(), None)['errors'] error_message = ', '.join([error['message'] for error in errors]) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True) raise return data @@ -195,11 +194,11 @@ class VevoIE(VevoBaseIE): self._initialize_api(video_id) video_info = self._call_api( - 'video/%s' % video_id, video_id, 'Downloading api video info', + f'video/{video_id}', video_id, 'Downloading api video info', 'Failed to download video info') video_versions = self._call_api( - 'video/%s/streams' % video_id, video_id, + f'video/{video_id}/streams', video_id, 'Downloading video versions info', 'Failed to download video versions info', fatal=False) @@ -215,7 +214,7 @@ class VevoIE(VevoBaseIE): video_versions = [ value for key, value in json_data['apollo']['data'].items() - if key.startswith('%s.streams' % video_id)] + if key.startswith(f'{video_id}.streams')] uploader = None artist = None @@ -238,16 +237,16 @@ class VevoIE(VevoBaseIE): continue elif '.mpd' in version_url: formats.extend(self._extract_mpd_formats( - version_url, video_id, mpd_id='dash-%s' % version, - note='Downloading %s MPD information' % version, - errnote='Failed to download %s MPD information' % version, + version_url, video_id, mpd_id=f'dash-{version}', + note=f'Downloading {version} MPD information', + errnote=f'Failed to download {version} MPD information', fatal=False)) elif '.m3u8' in version_url: formats.extend(self._extract_m3u8_formats( version_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls-%s' % version, - note='Downloading %s m3u8 information' % version, - errnote='Failed to download %s m3u8 information' % version, + m3u8_id=f'hls-{version}', + note=f'Downloading {version} m3u8 information', + errnote=f'Failed to download {version} m3u8 information', fatal=False)) else: m = re.search(r'''(?xi) @@ -275,13 +274,13 @@ class VevoIE(VevoBaseIE): track = video_info['title'] if featured_artist: - artist = '%s ft. %s' % (artist, featured_artist) - title = '%s - %s' % (artist, track) if artist else track + artist = f'{artist} ft. {featured_artist}' + title = f'{artist} - {track}' if artist else track genres = video_info.get('genres') genre = ( genres[0] if genres and isinstance(genres, list) - and isinstance(genres[0], compat_str) else None) + and isinstance(genres[0], str) else None) is_explicit = video_info.get('isExplicit') if is_explicit is True: @@ -337,15 +336,15 @@ class VevoPlaylistIE(VevoBaseIE): r'<meta[^>]+content=(["\'])vevo://video/(?P<id>.+?)\1[^>]*>', webpage, 'video id', default=None, group='id') if video_id: - return self.url_result('vevo:%s' % video_id, VevoIE.ie_key()) + return self.url_result(f'vevo:{video_id}', VevoIE.ie_key()) - playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind] + playlists = self._extract_json(webpage, playlist_id)['default'][f'{playlist_kind}s'] - playlist = (list(playlists.values())[0] + playlist = (next(iter(playlists.values())) if playlist_kind == 'playlist' else playlists[playlist_id]) entries = [ - self.url_result('vevo:%s' % src, VevoIE.ie_key()) + self.url_result(f'vevo:{src}', VevoIE.ie_key()) for src in playlist['isrcs']] return self.playlist_result( diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py index db338fa..1eb2553 100644 --- a/yt_dlp/extractor/vgtv.py +++ b/yt_dlp/extractor/vgtv.py @@ -39,7 +39,7 @@ class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'''(?x) (?:https?://(?:www\.)? (?P<host> - %s + {} ) /? (?: @@ -48,10 +48,10 @@ class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE a(?:rticles)?/ )| (?P<appname> - %s + {} ):) (?P<id>\d+) - ''' % ('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys())) + '''.format('|'.join(_HOST_TO_APPNAME.keys()), '|'.join(_APP_NAME_TO_VENDOR.keys())) _TESTS = [ { @@ -174,13 +174,12 @@ class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE vendor = self._APP_NAME_TO_VENDOR[appname] data = self._download_json( - 'http://svp.vg.no/svp/api/v1/%s/assets/%s?appName=%s-website' - % (vendor, video_id, appname), + f'http://svp.vg.no/svp/api/v1/{vendor}/assets/{video_id}?appName={appname}-website', video_id, 'Downloading media JSON') if data.get('status') == 'inactive': raise ExtractorError( - 'Video %s is no longer available' % video_id, expected=True) + f'Video {video_id} is no longer available', expected=True) info = { 'formats': [], @@ -203,7 +202,7 @@ class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE if hds_url: hdcore_sign = 'hdcore=3.7.0' f4m_formats = self._extract_f4m_formats( - hds_url + '?%s' % hdcore_sign, video_id, f4m_id='hds', fatal=False) + hds_url + f'?{hdcore_sign}', video_id, f4m_id='hds', fatal=False) if f4m_formats: for entry in f4m_formats: # URLs without the extra param induce an 404 error @@ -225,7 +224,7 @@ class VGTVIE(XstreamIE): # XXX: Do not subclass from concrete IE 'width': int(mobj.group(1)), 'height': int(mobj.group(2)), 'tbr': tbr, - 'format_id': 'mp4-%s' % tbr, + 'format_id': f'mp4-{tbr}', }) formats.append(format_info) @@ -275,7 +274,7 @@ class BTArticleIE(InfoExtractor): webpage = self._download_webpage(url, self._match_id(url)) video_id = self._search_regex( r'<video[^>]+data-id="(\d+)"', webpage, 'video id') - return self.url_result('bttv:%s' % video_id, 'VGTV') + return self.url_result(f'bttv:{video_id}', 'VGTV') class BTVestlendingenIE(InfoExtractor): @@ -308,4 +307,4 @@ class BTVestlendingenIE(InfoExtractor): }] def _real_extract(self, url): - return self.url_result('bttv:%s' % self._match_id(url), 'VGTV') + return self.url_result(f'bttv:{self._match_id(url)}', 'VGTV') diff --git a/yt_dlp/extractor/vh1.py b/yt_dlp/extractor/vh1.py index 41b8a46..53d5a71 100644 --- a/yt_dlp/extractor/vh1.py +++ b/yt_dlp/extractor/vh1.py @@ -1,6 +1,6 @@ from .mtv import MTVServicesInfoExtractor -# TODO Remove - Reason: Outdated Site +# TODO: Remove - Reason: Outdated Site class VH1IE(MTVServicesInfoExtractor): diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py index b072d9d..3739a37 100644 --- a/yt_dlp/extractor/vice.py +++ b/yt_dlp/extractor/vice.py @@ -7,7 +7,6 @@ import time from .adobepass import AdobePassIE from .common import InfoExtractor from .youtube import YoutubeIE -from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -28,7 +27,7 @@ class ViceBaseIE(InfoExtractor): %s(locale: "%s", %s: "%s"%s) { %s } -}''' % (resource, locale, resource_key, resource_id, args, fields), +}''' % (resource, locale, resource_key, resource_id, args, fields), # noqa: UP031 })['data'][resource] @@ -127,7 +126,7 @@ class ViceIE(ViceBaseIE, AdobePassIE): query.update({ 'exp': exp, - 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(), + 'sign': hashlib.sha512(f'{video_id}:GET:{exp}'.encode()).hexdigest(), 'skipadstitching': 1, 'platform': 'desktop', 'rn': random.randint(10000, 100000), @@ -135,14 +134,13 @@ class ViceIE(ViceBaseIE, AdobePassIE): try: preplay = self._download_json( - 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id), + f'https://vms.vice.com/{locale}/video/preplay/{video_id}', video_id, query=query) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status in (400, 401): error = json.loads(e.cause.response.read().decode()) error_message = error.get('error_description') or error['details'] - raise ExtractorError('%s said: %s' % ( - self.IE_NAME, error_message), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True) raise video_data = preplay['video'] @@ -157,7 +155,7 @@ class ViceIE(ViceBaseIE, AdobePassIE): cc_url = subtitle.get('url') if not cc_url: continue - language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en' + language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], str) or 'en' subtitles.setdefault(language_code, []).append({ 'url': cc_url, }) @@ -171,7 +169,7 @@ class ViceIE(ViceBaseIE, AdobePassIE): 'duration': int_or_none(video_data.get('video_duration')), 'timestamp': int_or_none(video_data.get('created_at'), 1000), 'age_limit': parse_age_limit(video_data.get('video_rating') or rating), - 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str), + 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], str), 'episode_number': int_or_none(episode.get('episode_number')), 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')), 'season_number': int_or_none(season.get('season_number')), @@ -202,7 +200,7 @@ class ViceShowIE(ViceBaseIE): def _fetch_page(self, locale, show_id, page): videos = self._call_api('videos', 'show_id', show_id, locale, '''body id - url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE)) + url''', f', page: {page + 1}, per_page: {self._PAGE_SIZE}') for video in videos: yield self.url_result( video['url'], ViceIE.ie_key(), video.get('id')) diff --git a/yt_dlp/extractor/viddler.py b/yt_dlp/extractor/viddler.py index 4091477..60ebfbd 100644 --- a/yt_dlp/extractor/viddler.py +++ b/yt_dlp/extractor/viddler.py @@ -25,7 +25,7 @@ class ViddlerIE(InfoExtractor): 'view_count': int, 'comment_count': int, 'categories': ['video content', 'high quality video', 'video made easy', 'how to produce video with limited resources', 'viddler'], - } + }, }, { 'url': 'http://www.viddler.com/v/4d03aad9/', 'md5': 'f12c5a7fa839c47a79363bfdf69404fb', @@ -38,7 +38,7 @@ class ViddlerIE(InfoExtractor): 'timestamp': 1422285291, 'view_count': int, 'comment_count': int, - } + }, }, { 'url': 'http://www.viddler.com/player/221ebbbd/0/', 'md5': '740511f61d3d1bb71dc14a0fe01a1c10', @@ -52,7 +52,7 @@ class ViddlerIE(InfoExtractor): 'timestamp': 1411997190, 'view_count': int, 'comment_count': int, - } + }, }, { # secret protected 'url': 'http://www.viddler.com/v/890c0985?secret=34051570', diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index 634d2ed..8dc7ebd 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -1,9 +1,10 @@ +import base64 import random import string import struct from .common import InfoExtractor -from ..compat import compat_b64decode, compat_ord +from ..compat import compat_ord from ..utils import ( ExtractorError, int_or_none, @@ -118,7 +119,7 @@ class VideaIE(InfoExtractor): l = nonce[:32] s = nonce[32:] result = '' - for i in range(0, 32): + for i in range(32): result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] query = parse_qs(player_url) @@ -133,7 +134,7 @@ class VideaIE(InfoExtractor): else: key = result[16:] + random_seed + handle.headers['x-videa-xs'] info = self._parse_xml(self.rc4( - compat_b64decode(b64_info), key), video_id) + base64.b64decode(b64_info), key), video_id) video = xpath_element(info, './video', 'video') if video is None: diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py index 37bc7d7..6f98c68 100644 --- a/yt_dlp/extractor/videocampus_sachsen.py +++ b/yt_dlp/extractor/videocampus_sachsen.py @@ -70,11 +70,11 @@ class VideocampusSachsenIE(InfoExtractor): 'www.wenglor-media.com', 'www2.univ-sba.dz', ) - _VALID_URL = r'''(?x)https?://(?P<host>%s)/(?: + _VALID_URL = r'''(?x)https?://(?P<host>{})/(?: m/(?P<tmp_id>[0-9a-f]+)| - (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})| - media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{32}&?) - )''' % ('|'.join(map(re.escape, _INSTANCES))) + (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{{32}})| + media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{{32}}&?) + )'''.format('|'.join(map(re.escape, _INSTANCES))) _TESTS = [ { @@ -119,7 +119,7 @@ class VideocampusSachsenIE(InfoExtractor): 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg', 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122', 'ext': 'mp4', - } + }, }, { 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c', @@ -187,10 +187,10 @@ class VideocampusSachsenIE(InfoExtractor): class ViMPPlaylistIE(InfoExtractor): IE_NAME = 'ViMP:Playlist' - _VALID_URL = r'''(?x)(?P<host>https?://(?:%s))/(?: + _VALID_URL = r'''(?x)(?P<host>https?://(?:{}))/(?: album/view/aid/(?P<album_id>[0-9]+)| (?P<mode>category|channel)/(?P<name>[\w-]+)/(?P<id>[0-9]+) - )''' % '|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES)) + )'''.format('|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES))) _TESTS = [{ 'url': 'https://vimp.oth-regensburg.de/channel/Designtheorie-1-SoSe-2020/3', @@ -216,9 +216,9 @@ class ViMPPlaylistIE(InfoExtractor): }] _PAGE_SIZE = 10 - def _fetch_page(self, host, url_part, id, data, page): + def _fetch_page(self, host, url_part, playlist_id, data, page): webpage = self._download_webpage( - f'{host}/media/ajax/component/boxList/{url_part}', id, + f'{host}/media/ajax/component/boxList/{url_part}', playlist_id, query={'page': page, 'page_only': 1}, data=urlencode_postdata(data)) urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage) @@ -226,28 +226,28 @@ class ViMPPlaylistIE(InfoExtractor): yield self.url_result(host + url, VideocampusSachsenIE) def _real_extract(self, url): - host, album_id, mode, name, id = self._match_valid_url(url).group( + host, album_id, mode, name, playlist_id = self._match_valid_url(url).group( 'host', 'album_id', 'mode', 'name', 'id') - webpage = self._download_webpage(url, album_id or id, fatal=False) or '' + webpage = self._download_webpage(url, album_id or playlist_id, fatal=False) or '' title = (self._html_search_meta('title', webpage, fatal=False) or self._html_extract_title(webpage)) url_part = (f'aid/{album_id}' if album_id - else f'category/{name}/category_id/{id}' if mode == 'category' - else f'title/{name}/channel/{id}') + else f'category/{name}/category_id/{playlist_id}' if mode == 'category' + else f'title/{name}/channel/{playlist_id}') mode = mode or 'album' data = { 'vars[mode]': mode, - f'vars[{mode}]': album_id or id, + f'vars[{mode}]': album_id or playlist_id, 'vars[context]': '4' if album_id else '1' if mode == 'category' else '3', - 'vars[context_id]': album_id or id, + 'vars[context_id]': album_id or playlist_id, 'vars[layout]': 'thumb', 'vars[per_page][thumb]': str(self._PAGE_SIZE), } return self.playlist_result( OnDemandPagedList(functools.partial( - self._fetch_page, host, url_part, album_id or id, data), self._PAGE_SIZE), - playlist_title=title, id=f'{mode}-{album_id or id}') + self._fetch_page, host, url_part, album_id or playlist_id, data), self._PAGE_SIZE), + playlist_title=title, id=f'{mode}-{album_id or playlist_id}') diff --git a/yt_dlp/extractor/videofyme.py b/yt_dlp/extractor/videofyme.py index f1f88c4..6cdda75 100644 --- a/yt_dlp/extractor/videofyme.py +++ b/yt_dlp/extractor/videofyme.py @@ -31,7 +31,7 @@ class VideofyMeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - config = self._download_json('http://vf-player-info-loader.herokuapp.com/%s.json' % video_id, video_id)['videoinfo'] + config = self._download_json(f'http://vf-player-info-loader.herokuapp.com/{video_id}.json', video_id)['videoinfo'] video = config.get('video') blog = config.get('blog', {}) diff --git a/yt_dlp/extractor/videoken.py b/yt_dlp/extractor/videoken.py index eaf0cc8..dc1dcf1 100644 --- a/yt_dlp/extractor/videoken.py +++ b/yt_dlp/extractor/videoken.py @@ -293,7 +293,7 @@ class VideoKenTopicIE(VideoKenBaseIE): 'playlist_mincount': 77, 'info_dict': { 'id': 'gravitational_waves', - 'title': 'gravitational waves' + 'title': 'gravitational waves', }, }, { 'url': 'https://videos.cncf.io/topic/prometheus/', diff --git a/yt_dlp/extractor/videomore.py b/yt_dlp/extractor/videomore.py index ddc33f7..c41d3d9 100644 --- a/yt_dlp/extractor/videomore.py +++ b/yt_dlp/extractor/videomore.py @@ -1,7 +1,4 @@ from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( int_or_none, parse_qs, @@ -253,7 +250,7 @@ class VideomoreVideoIE(VideomoreBaseIE): 'params': { 'skip_download': True, }, - 'skip': 'redirects to https://more.tv/' + 'skip': 'redirects to https://more.tv/', }, { 'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so', 'only_matching': True, @@ -264,7 +261,7 @@ class VideomoreVideoIE(VideomoreBaseIE): @classmethod def suitable(cls, url): - return False if VideomoreIE.suitable(url) else super(VideomoreVideoIE, cls).suitable(url) + return False if VideomoreIE.suitable(url) else super().suitable(url) def _real_extract(self, url): display_id = self._match_id(url) @@ -292,14 +289,14 @@ class VideomoreSeasonIE(VideomoreBaseIE): @classmethod def suitable(cls, url): return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url)) - else super(VideomoreSeasonIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): display_id = self._match_id(url) season = self._download_page_data(display_id) - season_id = compat_str(season['id']) + season_id = str(season['id']) tracks = self._download_json( - self._API_BASE_URL + 'seasons/%s/tracks' % season_id, + self._API_BASE_URL + f'seasons/{season_id}/tracks', season_id)['data'] entries = [] for track in tracks: diff --git a/yt_dlp/extractor/videopress.py b/yt_dlp/extractor/videopress.py index 0734aee..d3c9c8e 100644 --- a/yt_dlp/extractor/videopress.py +++ b/yt_dlp/extractor/videopress.py @@ -14,7 +14,7 @@ from ..utils import ( class VideoPressIE(InfoExtractor): _ID_REGEX = r'[\da-zA-Z]{8}' _PATH_REGEX = r'video(?:\.word)?press\.com/embed/' - _VALID_URL = r'https?://%s(?P<id>%s)' % (_PATH_REGEX, _ID_REGEX) + _VALID_URL = rf'https?://{_PATH_REGEX}(?P<id>{_ID_REGEX})' _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>(?:https?://)?{_PATH_REGEX}{_ID_REGEX})'] _TESTS = [{ 'url': 'https://videopress.com/embed/kUJmAcSf', @@ -44,7 +44,7 @@ class VideoPressIE(InfoExtractor): query = random_birthday('birth_year', 'birth_month', 'birth_day') query['fields'] = 'description,duration,file_url_base,files,height,original,poster,rating,title,upload_date,width' video = self._download_json( - 'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, + f'https://public-api.wordpress.com/rest/v1.1/videos/{video_id}', video_id, query=query) title = video['title'] @@ -63,7 +63,7 @@ class VideoPressIE(InfoExtractor): if ext in ('mp4', 'ogg'): formats.append({ 'url': urljoin(base_url, path), - 'format_id': '%s-%s' % (format_id, ext), + 'format_id': f'{format_id}-{ext}', 'ext': determine_ext(path, ext), 'quality': quality(format_id), }) diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 6322bb0..955a116 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -31,7 +31,7 @@ class VidioBaseIE(InfoExtractor): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading log in page') - login_form = self._form_hidden_inputs("login-form", login_page) + login_form = self._form_hidden_inputs('login-form', login_page) login_form.update({ 'user[login]': username, 'user[password]': password, @@ -52,7 +52,7 @@ class VidioBaseIE(InfoExtractor): elif reason: subreason = get_element_by_class('onboarding-modal__description-text', login_post) or '' raise ExtractorError( - 'Unable to log in: %s. %s' % (reason, clean_html(subreason)), expected=True) + f'Unable to log in: {reason}. {clean_html(subreason)}', expected=True) raise ExtractorError('Unable to log in') def _initialize_pre_login(self): @@ -98,7 +98,7 @@ class VidioIE(VidioBaseIE): }, { # Premier-exclusive video 'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon', - 'only_matching': True + 'only_matching': True, }, { # embed url from https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah 'url': 'https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah', @@ -135,7 +135,7 @@ class VidioIE(VidioBaseIE): if is_premium: sources = self._download_json( - 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=videos' % video_id, + f'https://www.vidio.com/interactions_stream.json?video_id={video_id}&type=videos', display_id, note='Downloading premier API JSON') if not (sources.get('source') or sources.get('source_dash')): self.raise_login_required('This video is only available for registered users with the appropriate subscription') @@ -199,7 +199,7 @@ class VidioPremierIE(VidioBaseIE): def _playlist_entries(self, playlist_url, display_id): index = 1 while playlist_url: - playlist_json = self._call_api(playlist_url, display_id, 'Downloading API JSON page %s' % index) + playlist_json = self._call_api(playlist_url, display_id, f'Downloading API JSON page {index}') for video_json in playlist_json.get('data', []): link = video_json['links']['watchpage'] yield self.url_result(link, 'Vidio', video_json['id']) @@ -217,14 +217,14 @@ class VidioPremierIE(VidioBaseIE): self._playlist_entries(playlist_url, playlist_id), playlist_id=playlist_id, playlist_title=idata.get('title')) - playlist_data = self._call_api('https://api.vidio.com/content_profiles/%s/playlists' % playlist_id, display_id) + playlist_data = self._call_api(f'https://api.vidio.com/content_profiles/{playlist_id}/playlists', display_id) return self.playlist_from_matches( playlist_data.get('data', []), playlist_id=playlist_id, ie=self.ie_key(), getter=lambda data: smuggle_url(url, { 'url': data['relationships']['videos']['links']['related'], 'id': data['id'], - 'title': try_get(data, lambda x: x['attributes']['name']) + 'title': try_get(data, lambda x: x['attributes']['name']), })) @@ -252,7 +252,7 @@ class VidioLiveIE(VidioBaseIE): def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).groups() stream_data = self._call_api( - 'https://www.vidio.com/api/livestreamings/%s/detail' % video_id, display_id) + f'https://www.vidio.com/api/livestreamings/{video_id}/detail', display_id) stream_meta = stream_data['livestreamings'][0] user = stream_data.get('users', [{}])[0] @@ -265,14 +265,14 @@ class VidioLiveIE(VidioBaseIE): self.report_drm(video_id) if stream_meta.get('is_premium'): sources = self._download_json( - 'https://www.vidio.com/interactions_stream.json?video_id=%s&type=livestreamings' % video_id, + f'https://www.vidio.com/interactions_stream.json?video_id={video_id}&type=livestreamings', display_id, note='Downloading premier API JSON') if not (sources.get('source') or sources.get('source_dash')): self.raise_login_required('This video is only available for registered users with the appropriate subscription') if str_or_none(sources.get('source')): token_json = self._download_json( - 'https://www.vidio.com/live/%s/tokens' % video_id, + f'https://www.vidio.com/live/{video_id}/tokens', display_id, note='Downloading HLS token JSON', data=b'') formats.extend(self._extract_m3u8_formats( sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native')) @@ -281,7 +281,7 @@ class VidioLiveIE(VidioBaseIE): else: if stream_meta.get('stream_token_url'): token_json = self._download_json( - 'https://www.vidio.com/live/%s/tokens' % video_id, + f'https://www.vidio.com/live/{video_id}/tokens', display_id, note='Downloading HLS token JSON', data=b'') formats.extend(self._extract_m3u8_formats( stream_meta['stream_token_url'] + '?' + token_json.get('token', ''), diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index e1219a8..d9e33ca 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -34,7 +34,7 @@ class VidLiiIE(InfoExtractor): 'average_rating': float, 'categories': ['News & Politics'], 'tags': ['Vidlii', 'Jan', 'Videogames'], - } + }, }, { 'url': 'https://www.vidlii.com/watch?v=zTAtaAgOLKt', 'md5': '5778f7366aa4c569b77002f8bf6b614f', @@ -63,7 +63,7 @@ class VidLiiIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://www.vidlii.com/watch?v=%s' % video_id, video_id) + f'https://www.vidlii.com/watch?v={video_id}', video_id) formats = [] sources = [source[1] for source in re.findall( diff --git a/yt_dlp/extractor/vidly.py b/yt_dlp/extractor/vidly.py index 49a1960..60698f0 100644 --- a/yt_dlp/extractor/vidly.py +++ b/yt_dlp/extractor/vidly.py @@ -45,7 +45,7 @@ class VidlyIE(InfoExtractor): 'ext': 'mp4', 'title': 'w8p5b0', 'thumbnail': r're:https://\w+\.cloudfront\.net/', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index c5d65cd..4a7ba98 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -5,6 +5,7 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, int_or_none, + join_nonempty, parse_age_limit, traverse_obj, ) @@ -62,8 +63,8 @@ class ViewLiftBaseIE(InfoExtractor): class ViewLiftEmbedIE(ViewLiftBaseIE): IE_NAME = 'viewlift:embed' - _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX - _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX] + _VALID_URL = rf'https?://(?:(?:www|embed)\.)?(?P<domain>{ViewLiftBaseIE._DOMAINS_REGEX})/embed/player\?.*\bfilmId=(?P<id>[\da-f]{{8}}-(?:[\da-f]{{4}}-){{3}}[\da-f]{{12}})' + _EMBED_REGEX = [rf'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:{ViewLiftBaseIE._DOMAINS_REGEX})/embed/player.+?)\1'] _TESTS = [{ 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500', 'md5': '2924e9215c6eff7a55ed35b72276bd93', @@ -74,7 +75,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): 'description': 'md5:b542bef32a6f657dadd0df06e26fb0c8', 'timestamp': 1334350096, 'upload_date': '20120413', - } + }, }, { # invalid labels, 360p is better that 480p 'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036', @@ -98,7 +99,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): content_data = self._call_api( site, 'entitlement/video/status', film_id, url, { - 'id': film_id + 'id': film_id, })['video'] gist = content_data['gist'] title = gist['title'] @@ -120,7 +121,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): 'height', default=None)) formats.append({ 'url': video_asset_url, - 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''), + 'format_id': join_nonempty('http', bitrate), 'tbr': bitrate, 'height': height, 'vcodec': video_asset.get('codec'), @@ -153,7 +154,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): class ViewLiftIE(ViewLiftBaseIE): IE_NAME = 'viewlift' _API_BASE = 'https://prod-api-cached-2.viewlift.com/' - _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX + _VALID_URL = rf'https?://(?:www\.)?(?P<domain>{ViewLiftBaseIE._DOMAINS_REGEX})(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' _TESTS = [{ 'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'md5': '19844f897b35af219773fd63bdec2942', @@ -169,7 +170,7 @@ class ViewLiftIE(ViewLiftBaseIE): 'age_limit': 14, 'upload_date': '20150421', 'timestamp': 1429656820, - } + }, }, { 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india', 'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd', @@ -183,7 +184,7 @@ class ViewLiftIE(ViewLiftBaseIE): 'duration': 979, 'timestamp': 1399478279, 'upload_date': '20140507', - } + }, }, { 'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love', 'info_dict': { @@ -253,7 +254,7 @@ class ViewLiftIE(ViewLiftBaseIE): 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20210830', - 'series': 'Case Jaundice' + 'series': 'Case Jaundice', }, 'params': {'skip_download': True}, }, { # Free video @@ -265,7 +266,7 @@ class ViewLiftIE(ViewLiftBaseIE): 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20211006', - 'series': 'Six (Hindi)' + 'series': 'Six (Hindi)', }, 'params': {'skip_download': True}, }, { # Free episode @@ -277,7 +278,7 @@ class ViewLiftIE(ViewLiftBaseIE): 'description': 'md5:ef6ffae01a3d83438597367400f824ed', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20211004', - 'series': 'Asian Paints Moner Thikana' + 'series': 'Asian Paints Moner Thikana', }, 'params': {'skip_download': True}, }, { # Free series @@ -294,7 +295,7 @@ class ViewLiftIE(ViewLiftBaseIE): }, }, { # Premium movie 'url': 'https://www.hoichoi.tv/movies/detective-2020', - 'only_matching': True + 'only_matching': True, }, { # Chorki Premium series 'url': 'https://www.chorki.com/bn/series/sinpaat', 'playlist_mincount': 7, @@ -326,7 +327,7 @@ class ViewLiftIE(ViewLiftBaseIE): @classmethod def suitable(cls, url): - return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url) + return False if ViewLiftEmbedIE.suitable(url) else super().suitable(url) def _show_entries(self, domain, seasons): for season in seasons: @@ -355,7 +356,7 @@ class ViewLiftIE(ViewLiftBaseIE): film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule') return { '_type': 'url_transparent', - 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id), + 'url': f'http://{domain}/embed/player?filmId={film_id}', 'id': film_id, 'display_id': display_id, 'ie_key': 'ViewLiftEmbed', diff --git a/yt_dlp/extractor/viidea.py b/yt_dlp/extractor/viidea.py index 649ffe3..2f04e88 100644 --- a/yt_dlp/extractor/viidea.py +++ b/yt_dlp/extractor/viidea.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -124,13 +121,13 @@ class ViideaIE(InfoExtractor): r'cfg\s*:\s*({[^}]+})'], webpage, 'cfg'), lecture_slug, js_to_json) - lecture_id = compat_str(cfg['obj_id']) + lecture_id = str(cfg['obj_id']) base_url = self._proto_relative_url(cfg['livepipe'], 'http:') try: lecture_data = self._download_json( - '%s/site/api/lecture/%s?format=json' % (base_url, lecture_id), + f'{base_url}/site/api/lecture/{lecture_id}?format=json', lecture_id)['lecture'][0] except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 403: @@ -150,18 +147,18 @@ class ViideaIE(InfoExtractor): playlist_entries = [] lecture_type = lecture_data.get('type') - parts = [compat_str(video) for video in cfg.get('videos', [])] + parts = [str(video) for video in cfg.get('videos', [])] if parts: multipart = len(parts) > 1 def extract_part(part_id): - smil_url = '%s/%s/video/%s/smil.xml' % (base_url, lecture_slug, part_id) + smil_url = f'{base_url}/{lecture_slug}/video/{part_id}/smil.xml' smil = self._download_smil(smil_url, lecture_id) info = self._parse_smil(smil, smil_url, lecture_id) - info['id'] = lecture_id if not multipart else '%s_part%s' % (lecture_id, part_id) - info['display_id'] = lecture_slug if not multipart else '%s_part%s' % (lecture_slug, part_id) + info['id'] = lecture_id if not multipart else f'{lecture_id}_part{part_id}' + info['display_id'] = lecture_slug if not multipart else f'{lecture_slug}_part{part_id}' if multipart: - info['title'] += ' (Part %s)' % part_id + info['title'] += f' (Part {part_id})' switch = smil.find('.//switch') if switch is not None: info['duration'] = parse_duration(switch.attrib.get('dur')) @@ -187,9 +184,9 @@ class ViideaIE(InfoExtractor): # It's probably a playlist if not parts or lecture_type == 'evt': playlist_webpage = self._download_webpage( - '%s/site/ajax/drilldown/?id=%s' % (base_url, lecture_id), lecture_id) + f'{base_url}/site/ajax/drilldown/?id={lecture_id}', lecture_id) entries = [ - self.url_result(compat_urlparse.urljoin(url, video_url), 'Viidea') + self.url_result(urllib.parse.urljoin(url, video_url), 'Viidea') for _, video_url in re.findall( r'<a[^>]+href=(["\'])(.+?)\1[^>]+id=["\']lec=\d+', playlist_webpage)] playlist_entries.extend(entries) diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py index 3246dab..75f9cdf 100644 --- a/yt_dlp/extractor/viki.py +++ b/yt_dlp/extractor/viki.py @@ -43,14 +43,14 @@ class VikiBaseIE(InfoExtractor): 'X-Viki-as-id': '100005a-1625321982-3932', 'timestamp': str(timestamp), 'signature': str(sig), - 'x-viki-app-ver': self._APP_VERSION + 'x-viki-app-ver': self._APP_VERSION, } def _api_query(self, path, version=4, **kwargs): path += '?' if '?' not in path else '&' query = f'/v{version}/{path}app={self._APP}' if self._token: - query += '&token=%s' % self._token + query += f'&token={self._token}' return query + ''.join(f'&{name}={val}' for name, val in kwargs.items()) def _sign_query(self, path): @@ -68,7 +68,7 @@ class VikiBaseIE(InfoExtractor): url = self._API_URL_TEMPLATE % self._api_query(path, version=4) resp = self._download_json( url, video_id, note, fatal=fatal, query=query, - data=json.dumps(data).encode('utf-8') if data else None, + data=json.dumps(data).encode() if data else None, headers=({'x-viki-app-ver': self._APP_VERSION} if data else self._stream_headers(timestamp, sig) if query is None else None), expected_status=400) or {} @@ -79,7 +79,7 @@ class VikiBaseIE(InfoExtractor): def _raise_error(self, error, fatal=True): if error is None: return - msg = '%s said: %s' % (self.IE_NAME, error) + msg = f'{self.IE_NAME} said: {error}' if fatal: raise ExtractorError(msg, expected=True) else: @@ -113,7 +113,7 @@ class VikiBaseIE(InfoExtractor): class VikiIE(VikiBaseIE): IE_NAME = 'viki' - _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE + _VALID_URL = rf'{VikiBaseIE._VALID_URL_BASE}(?:videos|player)/(?P<id>[0-9]+v)' _TESTS = [{ 'note': 'Free non-DRM video with storyboards in MPD', 'url': 'https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1', @@ -233,10 +233,10 @@ class VikiIE(VikiBaseIE): title = try_get(video, lambda x: x['titles']['en'], str) episode_number = int_or_none(video.get('number')) if not title: - title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id + title = f'Episode {episode_number}' if video.get('type') == 'episode' else video.get('id') or video_id container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {} container_title = self.dict_selection(container_titles, 'en') - title = '%s - %s' % (container_title, title) + title = f'{container_title} - {title}' thumbnails = [{ 'id': thumbnail_id, @@ -244,15 +244,15 @@ class VikiIE(VikiBaseIE): } for thumbnail_id, thumbnail in (video.get('images') or {}).items() if thumbnail.get('url')] resp = self._call_api( - 'playback_streams/%s.json?drms=dt3&device_id=%s' % (video_id, self._DEVICE_ID), + f'playback_streams/{video_id}.json?drms=dt3&device_id={self._DEVICE_ID}', video_id, 'Downloading video streams JSON')['main'][0] stream_id = try_get(resp, lambda x: x['properties']['track']['stream_id']) subtitles = dict((lang, [{ 'ext': ext, 'url': self._API_URL_TEMPLATE % self._api_query( - f'videos/{video_id}/auth_subtitles/{lang}.{ext}', stream_id=stream_id) - } for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {}).keys()) + f'videos/{video_id}/auth_subtitles/{lang}.{ext}', stream_id=stream_id), + } for ext in ('srt', 'vtt')]) for lang in (video.get('subtitle_completions') or {})) mpd_url = resp['url'] # 720p is hidden in another MPD which can be found in the current manifest content @@ -283,7 +283,7 @@ class VikiIE(VikiBaseIE): class VikiChannelIE(VikiBaseIE): IE_NAME = 'viki:channel' - _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE + _VALID_URL = rf'{VikiBaseIE._VALID_URL_BASE}(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' _TESTS = [{ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers', 'info_dict': { @@ -317,7 +317,7 @@ class VikiChannelIE(VikiBaseIE): def _entries(self, channel_id): params = { 'app': self._APP, 'token': self._token, 'only_ids': 'true', - 'direction': 'asc', 'sort': 'number', 'per_page': 30 + 'direction': 'asc', 'sort': 'number', 'per_page': 30, } video_types = self._configuration_arg('video_types') or self._video_types for video_type in video_types: @@ -329,7 +329,7 @@ class VikiChannelIE(VikiBaseIE): params['page'] = page_num res = self._call_api( f'containers/{channel_id}/{video_type}.json', channel_id, query=params, fatal=False, - note='Downloading %s JSON page %d' % (video_type.title(), page_num)) + note=f'Downloading {video_type.title()} JSON page {page_num}') for video_id in res.get('response') or []: yield self.url_result(f'https://www.viki.com/videos/{video_id}', VikiIE.ie_key(), video_id) @@ -338,7 +338,7 @@ class VikiChannelIE(VikiBaseIE): def _real_extract(self, url): channel_id = self._match_id(url) - channel = self._call_api('containers/%s.json' % channel_id, channel_id, 'Downloading channel JSON') + channel = self._call_api(f'containers/{channel_id}.json', channel_id, 'Downloading channel JSON') self._check_errors(channel) return self.playlist_result( self._entries(channel_id), channel_id, diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index ac96ade..a4ab7e2 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -2,9 +2,9 @@ import base64 import functools import itertools import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_str, compat_urlparse from ..networking import HEADRequest, Request from ..networking.exceptions import HTTPError from ..utils import ( @@ -141,7 +141,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): continue formats.append({ 'url': video_url, - 'format_id': 'http-%s' % f.get('quality'), + 'format_id': 'http-{}'.format(f.get('quality')), 'source_preference': 10, 'width': int_or_none(f.get('width')), 'height': int_or_none(f.get('height')), @@ -156,19 +156,19 @@ class VimeoBaseInfoExtractor(InfoExtractor): manifest_url = cdn_data.get('url') if not manifest_url: continue - format_id = '%s-%s' % (files_type, cdn_name) + format_id = f'{files_type}-{cdn_name}' sep_manifest_urls = [] if re.search(sep_pattern, manifest_url): for suffix, repl in (('', 'video'), ('_sep', 'sep/video')): sep_manifest_urls.append((format_id + suffix, re.sub( - sep_pattern, '/%s/' % repl, manifest_url))) + sep_pattern, f'/{repl}/', manifest_url))) else: sep_manifest_urls = [(format_id, manifest_url)] for f_id, m_url in sep_manifest_urls: if files_type == 'hls': fmts, subs = self._extract_m3u8_formats_and_subtitles( m_url, video_id, 'mp4', live=is_live, m3u8_id=f_id, - note='Downloading %s m3u8 information' % cdn_name, + note=f'Downloading {cdn_name} m3u8 information', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) @@ -179,7 +179,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): m_url = real_m_url fmts, subs = self._extract_mpd_formats_and_subtitles( m_url.replace('/master.json', '/master.mpd'), video_id, f_id, - 'Downloading %s MPD information' % cdn_name, + f'Downloading {cdn_name} MPD information', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) @@ -256,10 +256,10 @@ class VimeoBaseInfoExtractor(InfoExtractor): download_url = try_get(source_file, lambda x: x['download_url']) if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'): source_name = source_file.get('public_name', 'Original') - if self._is_valid_url(download_url, video_id, '%s video' % source_name): + if self._is_valid_url(download_url, video_id, f'{source_name} video'): ext = (try_get( source_file, lambda x: x['extension'], - compat_str) or determine_ext( + str) or determine_ext( download_url, None) or 'mp4').lower() return { 'url': download_url, @@ -275,7 +275,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'https://vimeo.com/_rv/viewer', video_id, note='Downloading jwt token', fatal=False) or {} if not jwt_response.get('jwt'): return - headers = {'Authorization': 'jwt %s' % jwt_response['jwt'], 'Accept': 'application/json'} + headers = {'Authorization': 'jwt {}'.format(jwt_response['jwt']), 'Accept': 'application/json'} original_response = self._download_json( f'https://api.vimeo.com/videos/{video_id}', video_id, headers=headers, fatal=False, expected_status=(403, 404)) or {} @@ -361,7 +361,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'params': { 'format': 'best[protocol=https]', }, - 'skip': 'No longer available' + 'skip': 'No longer available', }, { 'url': 'http://player.vimeo.com/video/54469442', @@ -739,7 +739,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'Content-Type': 'application/x-www-form-urlencoded', }) checked = self._download_json( - f'{compat_urlparse.urlsplit(url)._replace(query=None).geturl()}/check-password', + f'{urllib.parse.urlsplit(url)._replace(query=None).geturl()}/check-password', video_id, 'Verifying the password', data=data, headers=headers) if checked is False: raise ExtractorError('Wrong video password', expected=True) @@ -748,7 +748,7 @@ class VimeoIE(VimeoBaseInfoExtractor): def _extract_from_api(self, video_id, unlisted_hash=None): token = self._download_json( 'https://vimeo.com/_rv/jwt', video_id, headers={ - 'X-Requested-With': 'XMLHttpRequest' + 'X-Requested-With': 'XMLHttpRequest', })['token'] api_url = 'https://api.vimeo.com/videos/' + video_id if unlisted_hash: @@ -802,7 +802,7 @@ class VimeoIE(VimeoBaseInfoExtractor): self._set_vimeo_cookie('vuid', viewer['vuid']) try: self._download_json( - 'https://vimeo.com/showcase/%s/auth' % album_id, + f'https://vimeo.com/showcase/{album_id}/auth', album_id, 'Verifying the password', data=urlencode_postdata({ 'password': password, 'token': viewer['xsrft'], @@ -829,21 +829,33 @@ class VimeoIE(VimeoBaseInfoExtractor): url = 'https://vimeo.com/' + video_id self._try_album_password(url) + is_secure = urllib.parse.urlparse(url).scheme == 'https' try: # Retrieve video webpage to extract further information webpage, urlh = self._download_webpage_handle( - url, video_id, headers=headers) + url, video_id, headers=headers, impersonate=is_secure) redirect_url = urlh.url - except ExtractorError as ee: - if isinstance(ee.cause, HTTPError) and ee.cause.status == 403: - errmsg = ee.cause.response.read() - if b'Because of its privacy settings, this video cannot be played here' in errmsg: - raise ExtractorError( - 'Cannot download embed-only video without embedding ' - 'URL. Please call yt-dlp with the URL of the page ' - 'that embeds this video.', - expected=True) - raise + except ExtractorError as error: + if not isinstance(error.cause, HTTPError) or error.cause.status not in (403, 429): + raise + errmsg = error.cause.response.read() + if b'Because of its privacy settings, this video cannot be played here' in errmsg: + raise ExtractorError( + 'Cannot download embed-only video without embedding URL. Please call yt-dlp ' + 'with the URL of the page that embeds this video.', expected=True) + # 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block + status = error.cause.status + dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked' + if target := error.cause.response.extensions.get('impersonate'): + raise ExtractorError( + f'Got HTTP Error {status} when using impersonate target "{target}". {dcip_msg}') + elif not is_secure: + raise ExtractorError(f'Got HTTP Error {status}. {dcip_msg}', expected=True) + raise ExtractorError( + 'This request has been blocked due to its TLS fingerprint. Install a ' + 'required impersonation dependency if possible, or else if you are okay with ' + f'{self._downloader._format_err("compromising your security/cookies", "light red")}, ' + f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True) if '://player.vimeo.com/video/' in url: config = self._search_json( @@ -864,7 +876,7 @@ class VimeoIE(VimeoBaseInfoExtractor): seed_status = vimeo_config.get('seed_status') or {} if seed_status.get('state') == 'failed': raise ExtractorError( - '%s said: %s' % (self.IE_NAME, seed_status['title']), + '{} said: {}'.format(self.IE_NAME, seed_status['title']), expected=True) cc_license = None @@ -916,7 +928,7 @@ class VimeoIE(VimeoBaseInfoExtractor): feature_id = vod.get('feature_id') if feature_id and not data.get('force_feature_id', False): return self.url_result(smuggle_url( - 'https://player.vimeo.com/player/%s' % feature_id, + f'https://player.vimeo.com/player/{feature_id}', {'force_feature_id': True}), 'Vimeo') if not video_description: @@ -1051,7 +1063,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s' def _page_url(self, base_url, pagenum): - return '%s/videos/page:%d/' % (base_url, pagenum) + return f'{base_url}/videos/page:{pagenum}/' def _extract_list_title(self, webpage): return self._TITLE or self._html_search_regex( @@ -1062,7 +1074,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): page_url = self._page_url(base_url, pagenum) webpage = self._download_webpage( page_url, list_id, - 'Downloading page %s' % pagenum) + f'Downloading page {pagenum}') if pagenum == 1: yield self._extract_list_title(webpage) @@ -1074,13 +1086,13 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): if clips: for video_id, video_url, video_title in clips: yield self.url_result( - compat_urlparse.urljoin(base_url, video_url), + urllib.parse.urljoin(base_url, video_url), VimeoIE.ie_key(), video_id=video_id, video_title=video_title) # More relaxed fallback else: for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): yield self.url_result( - 'https://vimeo.com/%s' % video_id, + f'https://vimeo.com/{video_id}', VimeoIE.ie_key(), video_id=video_id) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: @@ -1135,7 +1147,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor): 'playlist_count': 1, 'params': { 'videopassword': 'youtube-dl', - } + }, }] _PAGE_SIZE = 100 @@ -1150,8 +1162,8 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor): query['_hashed_pass'] = hashed_pass try: videos = self._download_json( - 'https://api.vimeo.com/albums/%s/videos' % album_id, - album_id, 'Downloading page %d' % api_page, query=query, headers={ + f'https://api.vimeo.com/albums/{album_id}/videos', + album_id, f'Downloading page {api_page}', query=query, headers={ 'Authorization': 'jwt ' + authorization, 'Accept': 'application/json', })['data'] @@ -1191,7 +1203,7 @@ class VimeoAlbumIE(VimeoBaseInfoExtractor): self._set_vimeo_cookie('vuid', viewer['vuid']) try: hashed_pass = self._download_json( - 'https://vimeo.com/showcase/%s/auth' % album_id, + f'https://vimeo.com/showcase/{album_id}/auth', album_id, 'Verifying the password', data=urlencode_postdata({ 'password': password, 'token': viewer['xsrft'], @@ -1311,7 +1323,7 @@ class VimeoWatchLaterIE(VimeoChannelIE): # XXX: Do not subclass from concrete I }] def _page_url(self, base_url, pagenum): - url = '%s/page:%d/' % (base_url, pagenum) + url = f'{base_url}/page:{pagenum}/' request = Request(url) # Set the header to get a partial html page with the ids, # the normal page doesn't contain them. @@ -1339,11 +1351,11 @@ class VimeoLikesIE(VimeoChannelIE): # XXX: Do not subclass from concrete IE }] def _page_url(self, base_url, pagenum): - return '%s/page:%d/' % (base_url, pagenum) + return f'{base_url}/page:{pagenum}/' def _real_extract(self, url): user_id = self._match_id(url) - return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id) + return self._extract_videos(user_id, f'https://vimeo.com/{user_id}/likes') class VHXEmbedIE(VimeoBaseInfoExtractor): diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py index 1909980..eed4bfe 100644 --- a/yt_dlp/extractor/vine.py +++ b/yt_dlp/extractor/vine.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( determine_ext, format_field, @@ -62,11 +61,11 @@ class VineIE(InfoExtractor): video_id = self._match_id(url) data = self._download_json( - 'https://archive.vine.co/posts/%s.json' % video_id, video_id) + f'https://archive.vine.co/posts/{video_id}.json', video_id) def video_url(kind): for url_suffix in ('Url', 'URL'): - format_url = data.get('video%s%s' % (kind, url_suffix)) + format_url = data.get(f'video{kind}{url_suffix}') if format_url: return format_url @@ -126,14 +125,14 @@ class VineUserIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if VineIE.suitable(url) else super(VineUserIE, cls).suitable(url) + return False if VineIE.suitable(url) else super().suitable(url) def _real_extract(self, url): mobj = self._match_valid_url(url) user = mobj.group('user') u = mobj.group('u') - profile_url = '%sapi/users/profiles/%s%s' % ( + profile_url = '{}api/users/profiles/{}{}'.format( self._VINE_BASE_URL, 'vanity/' if not u else '', user) profile_data = self._download_json( profile_url, user, note='Downloading user profile data') @@ -141,11 +140,11 @@ class VineUserIE(InfoExtractor): data = profile_data['data'] user_id = data.get('userId') or data['userIdStr'] profile = self._download_json( - 'https://archive.vine.co/profiles/%s.json' % user_id, user_id) + f'https://archive.vine.co/profiles/{user_id}.json', user_id) entries = [ self.url_result( - 'https://vine.co/v/%s' % post_id, ie='Vine', video_id=post_id) + f'https://vine.co/v/{post_id}', ie='Vine', video_id=post_id) for post_id in profile['posts'] - if post_id and isinstance(post_id, compat_str)] + if post_id and isinstance(post_id, str)] return self.playlist_result( entries, user, profile.get('username'), profile.get('description')) diff --git a/yt_dlp/extractor/viously.py b/yt_dlp/extractor/viously.py index 9ec7ed3..3e8519c 100644 --- a/yt_dlp/extractor/viously.py +++ b/yt_dlp/extractor/viously.py @@ -25,7 +25,7 @@ class ViouslyIE(InfoExtractor): 'timestamp': 1680037507, 'duration': 3716, 'categories': ['motors'], - } + }, }] def _extract_from_webpage(self, url, webpage): diff --git a/yt_dlp/extractor/viqeo.py b/yt_dlp/extractor/viqeo.py index f0a7b5e..433fdc7 100644 --- a/yt_dlp/extractor/viqeo.py +++ b/yt_dlp/extractor/viqeo.py @@ -39,7 +39,7 @@ class ViqeoIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id) + f'https://cdn.viqeo.tv/embed/?vid={video_id}', video_id) data = self._parse_json( self._search_regex( diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index 480f49b..01e5935 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -5,7 +5,6 @@ import urllib.parse import uuid from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -68,7 +67,7 @@ class ViuIE(ViuBaseIE): 'clip/load', video_id, 'Downloading video data', query={ 'appid': 'viu_desktop', 'fmt': 'json', - 'id': video_id + 'id': video_id, })['item'][0] title = video_data['title'] @@ -82,7 +81,7 @@ class ViuIE(ViuBaseIE): # hls_file = video_data.get('hlsfile') hls_file = video_data.get('jwhlsfile') if url_path and tdirforwhole and hls_file: - m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file) + m3u8_url = f'{url_path}/{tdirforwhole}/{hls_file}' else: # m3u8_url = re.sub( # r'(/hlsc_)[a-z]+(\d+\.m3u8)', @@ -96,7 +95,7 @@ class ViuIE(ViuBaseIE): continue subtitles.setdefault(mobj.group('lang'), []).append({ 'url': value, - 'ext': mobj.group('ext') + 'ext': mobj.group('ext'), }) return { @@ -132,7 +131,7 @@ class ViuPlaylistIE(ViuBaseIE): 'Downloading playlist info', query={ 'appid': 'viu_desktop', 'fmt': 'json', - 'id': 'playlist-' + playlist_id + 'id': 'playlist-' + playlist_id, })['container'] entries = [] @@ -140,7 +139,7 @@ class ViuPlaylistIE(ViuBaseIE): item_id = item.get('id') if not item_id: continue - item_id = compat_str(item_id) + item_id = str(item_id) entries.append(self.url_result( 'viu:' + item_id, 'Viu', item_id)) @@ -227,14 +226,14 @@ class ViuOTTIE(InfoExtractor): return headers = { 'Authorization': f'Bearer {self._auth_codes[country_code]}', - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', } data = self._download_json( 'https://api-gateway-global.viu.com/api/account/validate', video_id, 'Validating email address', headers=headers, data=json.dumps({ 'principal': username, - 'provider': 'email' + 'provider': 'email', }).encode()) if not data.get('exists'): raise ExtractorError('Invalid email address') @@ -264,8 +263,8 @@ class ViuOTTIE(InfoExtractor): 'platformFlagLabel': 'web', 'language': 'en', 'uuid': str(uuid.uuid4()), - 'carrierId': '0' - }).encode('utf-8'))['token'] + 'carrierId': '0', + }).encode())['token'] def _real_extract(self, url): url, idata = unsmuggle_url(url, {}) @@ -319,7 +318,7 @@ class ViuOTTIE(InfoExtractor): headers={ 'Authorization': f'Bearer {self._auth_codes[country_code]}', 'Referer': url, - 'Origin': url + 'Origin': url, }) return self._detect_error(stream_data).get('stream') @@ -365,7 +364,7 @@ class ViuOTTIE(InfoExtractor): 'url': stream_url, 'height': height, 'ext': 'mp4', - 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int) + 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int), }) subtitles = {} @@ -415,12 +414,12 @@ class ViuOTTIndonesiaBaseIE(InfoExtractor): _HEADERS = { 'x-session-id': _SESSION_ID, - 'x-client': 'browser' + 'x-client': 'browser', } _AGE_RATINGS_MAPPER = { 'ADULTS': 18, - 'teens': 13 + 'teens': 13, } def _real_initialize(self): @@ -447,7 +446,7 @@ class ViuOTTIndonesiaIE(ViuOTTIndonesiaBaseIE): 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1165863189/d-1', 'upload_date': '20210101', 'timestamp': 1609459200, - } + }, }, { 'url': 'https://www.viu.com/ott/id/id/all/video-korean-reality-tv_shows-entertainment_weekly_episode_1622-1118617054', 'info_dict': { @@ -461,8 +460,8 @@ class ViuOTTIndonesiaIE(ViuOTTIndonesiaBaseIE): 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1120187848/d-1', 'timestamp': 1420070400, 'upload_date': '20150101', - 'cast': ['Shin Hyun-joon', 'Lee Da-Hee'] - } + 'cast': ['Shin Hyun-joon', 'Lee Da-Hee'], + }, }, { # age-limit test 'url': 'https://www.viu.com/ott/id/id/all/video-japanese-trailer-tv_shows-trailer_jujutsu_kaisen_ver_01-1166044219?containerId=playlist-26273140', @@ -477,7 +476,7 @@ class ViuOTTIndonesiaIE(ViuOTTIndonesiaBaseIE): 'description': 'Trailer \'Jujutsu Kaisen\' Ver.01', 'cast': ['Junya Enoki', ' Yûichi Nakamura', ' Yuma Uchida', 'Asami Seto'], 'age_limit': 13, - } + }, }, { # json ld metadata type equal to Movie instead of TVEpisodes 'url': 'https://www.viu.com/ott/id/id/all/video-japanese-animation-movies-demon_slayer_kimetsu_no_yaiba_the_movie_mugen_train-1165892707?containerId=1675060691786', @@ -492,7 +491,7 @@ class ViuOTTIndonesiaIE(ViuOTTIndonesiaBaseIE): 'thumbnail': 'https://vuclipi-a.akamaihd.net/p/cloudinary/h_171,w_304,dpr_1.5,f_auto,c_thumb,q_auto:low/1165895279/d-1', 'description': 'md5:1ce9c35a3aeab384085533f746c87469', 'duration': 7021, - } + }, }] def _real_extract(self, url): @@ -538,5 +537,5 @@ class ViuOTTIndonesiaIE(ViuOTTIndonesiaBaseIE): 'episode_number': (traverse_obj(initial_state, 'episode_no', 'episodeno', expected_type=int_or_none) or int_or_none(episode.get('episodeNumber'))), 'cast': traverse_obj(episode, ('actor', ..., 'name'), default=None), - 'age_limit': self._AGE_RATINGS_MAPPER.get(initial_state.get('internal_age_rating')) + 'age_limit': self._AGE_RATINGS_MAPPER.get(initial_state.get('internal_age_rating')), } diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 9a3c75b..6ccc701 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -140,7 +140,7 @@ class VKIE(VKBaseIE): 'comment_count': int, 'like_count': int, 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$', - } + }, }, { 'note': 'Embedded video', @@ -220,7 +220,7 @@ class VKIE(VKBaseIE): 'like_count': int, 'view_count': int, 'thumbnail': r're:https?://.+x1080$', - 'tags': list + 'tags': list, }, }, { @@ -335,7 +335,7 @@ class VKIE(VKBaseIE): mv_data = opts.get('mvData') or {} player = opts.get('player') or {} else: - video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id')) + video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id')) info_page = self._download_webpage( 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id) @@ -530,7 +530,7 @@ class VKUserVideosIE(VKBaseIE): 'url': 'https://vk.com/video/playlist/-174476437_2', 'info_dict': { 'id': '-174476437_playlist_2', - 'title': 'Анонсы' + 'title': 'Анонсы', }, 'playlist_mincount': 108, }] @@ -580,7 +580,7 @@ class VKUserVideosIE(VKBaseIE): section = 'all' playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage)) - return self.playlist_result(self._entries(page_id, section), '%s_%s' % (page_id, section), playlist_title) + return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title) class VKWallPostIE(VKBaseIE): diff --git a/yt_dlp/extractor/vodplatform.py b/yt_dlp/extractor/vodplatform.py index 5ff0500..703854b 100644 --- a/yt_dlp/extractor/vodplatform.py +++ b/yt_dlp/extractor/vodplatform.py @@ -13,7 +13,7 @@ class VODPlatformIE(InfoExtractor): 'id': 'RufMcytHDolTH1MuKHY9Fw', 'ext': 'mp4', 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"', - } + }, }, { 'url': 'http://embed.kwikmotion.com/embed/RufMcytHDolTH1MuKHY9Fw', 'only_matching': True, diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py index 9ab9768..f83c3f9 100644 --- a/yt_dlp/extractor/voicy.py +++ b/yt_dlp/extractor/voicy.py @@ -1,7 +1,6 @@ import itertools from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, smuggle_url, @@ -14,14 +13,14 @@ from ..utils import ( class VoicyBaseIE(InfoExtractor): def _extract_from_playlist_data(self, value): - voice_id = compat_str(value.get('PlaylistId')) + voice_id = str(value.get('PlaylistId')) upload_date = unified_strdate(value.get('Published'), False) items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']] return { '_type': 'multi_video', 'entries': items, 'id': voice_id, - 'title': compat_str(value.get('PlaylistName')), + 'title': str(value.get('PlaylistName')), 'uploader': value.get('SpeakerName'), 'uploader_id': str_or_none(value.get('SpeakerId')), 'channel': value.get('ChannelName'), @@ -45,7 +44,7 @@ class VoicyBaseIE(InfoExtractor): 'vcodec': 'none', }] return { - 'id': compat_str(entry.get('ArticleId')), + 'id': str(entry.get('ArticleId')), 'title': entry.get('ArticleTitle'), 'description': entry.get('MediaName'), 'formats': formats, @@ -54,7 +53,7 @@ class VoicyBaseIE(InfoExtractor): def _call_api(self, url, video_id, **kwargs): response = self._download_json(url, video_id, **kwargs) if response.get('Status') != 0: - message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str) + message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=str) if not message: message = 'There was a error in the response: %d' % response.get('Status') raise ExtractorError(message, expected=False) @@ -111,7 +110,7 @@ class VoicyChannelIE(VoicyBaseIE): def _entries(self, channel_id): pager = '' for count in itertools.count(1): - article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count) + article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note=f'Paging #{count}') playlist_data = article_list.get('PlaylistData') if not playlist_data: break @@ -124,12 +123,12 @@ class VoicyChannelIE(VoicyBaseIE): articles = self._entries(channel_id) first_article = next(articles, None) - title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str) - speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str) + title = traverse_obj(first_article, ('ChannelName', ), expected_type=str) + speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=str) if not title and speaker_name: - title = 'Uploads from %s' % speaker_name + title = f'Uploads from {speaker_name}' if not title: - title = 'Uploads from channel ID %s' % channel_id + title = f'Uploads from channel ID {channel_id}' articles = itertools.chain([first_article], articles) if first_article else articles diff --git a/yt_dlp/extractor/volejtv.py b/yt_dlp/extractor/volejtv.py index 622d841..42ef9b1 100644 --- a/yt_dlp/extractor/volejtv.py +++ b/yt_dlp/extractor/volejtv.py @@ -11,7 +11,7 @@ class VolejTVIE(InfoExtractor): 'description': 'Zápas VK Královo Pole vs VK Prostějov 10.12.2022 v 19:00 na Volej.TV', 'thumbnail': 'https://volej.tv/images/og/16/17186/og.png', 'title': 'VK Královo Pole vs VK Prostějov', - } + }, }, { 'url': 'https://volej.tv/video/725605/', 'info_dict': { @@ -20,7 +20,7 @@ class VolejTVIE(InfoExtractor): 'thumbnail': 'https://volej.tv/images/og/15/17185/og.png', 'title': 'VK Lvi Praha vs VK Euro Sitex Příbram', 'description': 'Zápas VK Lvi Praha vs VK Euro Sitex Příbram 11.12.2022 v 19:00 na Volej.TV', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/voxmedia.py b/yt_dlp/extractor/voxmedia.py index f369087..e9b0047 100644 --- a/yt_dlp/extractor/voxmedia.py +++ b/yt_dlp/extractor/voxmedia.py @@ -1,6 +1,7 @@ +import urllib.parse + from .common import InfoExtractor from .once import OnceIE -from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, int_or_none, @@ -52,7 +53,7 @@ class VoxMediaVolumeIE(OnceIE): return info for provider_video_type in ('youtube', 'brightcove'): - provider_video_id = video_data.get('%s_id' % provider_video_type) + provider_video_id = video_data.get(f'{provider_video_type}_id') if not provider_video_id: continue if provider_video_type == 'brightcove': @@ -60,7 +61,7 @@ class VoxMediaVolumeIE(OnceIE): else: info.update({ '_type': 'url_transparent', - 'url': provider_video_id if provider_video_type == 'youtube' else '%s:%s' % (provider_video_type, provider_video_id), + 'url': provider_video_id if provider_video_type == 'youtube' else f'{provider_video_type}:{provider_video_id}', 'ie_key': provider_video_type.capitalize(), }) return info @@ -172,7 +173,7 @@ class VoxMediaIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = compat_urllib_parse_unquote(self._download_webpage(url, display_id)) + webpage = urllib.parse.unquote(self._download_webpage(url, display_id)) def create_entry(provider_video_id, provider_video_type, title=None, description=None): video_url = { diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py index 3d26549..33ff574 100644 --- a/yt_dlp/extractor/vrt.py +++ b/yt_dlp/extractor/vrt.py @@ -38,12 +38,12 @@ class VRTBaseIE(GigyaBaseIE): 'device': 'undefined (undefined)', 'os': { 'name': 'Windows', - 'version': 'x86_64' + 'version': 'x86_64', }, 'player': { 'name': 'VRT web player', - 'version': '2.7.4-prod-2023-04-19T06:05:45' - } + 'version': '2.7.4-prod-2023-04-19T06:05:45', + }, } # From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js _JWT_KEY_ID = '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w=' @@ -98,8 +98,8 @@ class VRTBaseIE(GigyaBaseIE): }, data=json.dumps({ 'identityToken': id_token or {}, 'playerInfo': jwt_encode_hs256(player_info, self._JWT_SIGNING_KEY, headers={ - 'kid': self._JWT_KEY_ID - }).decode() + 'kid': self._JWT_KEY_ID, + }).decode(), }, separators=(',', ':')).encode())['vrtPlayerToken'] return self._download_json( @@ -365,7 +365,7 @@ class KetnetIE(VRTBaseIE): subtitleVideodetail titleVideodetail } -}''' % display_id, +}''' % display_id, # noqa: UP031 })['data']['video'] video_id = urllib.parse.unquote(video['mediaReference']) @@ -437,9 +437,9 @@ class Radio1BeIE(VRTBaseIE): 'title': 'Komt N-VA volgend jaar op in Wallonië?', 'display_id': 'de-ochtend/komt-n-va-volgend-jaar-op-in-wallonie', 'description': 'md5:b374ea1c9302f38362df9dea1931468e', - 'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+' + 'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+', }, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }, { 'url': 'https://radio1.be/lees/europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza?view=web', 'info_dict': { @@ -447,9 +447,9 @@ class Radio1BeIE(VRTBaseIE): 'title': 'Europese Unie wil "onmiddellijke humanitaire pauze" en "duurzaam staakt-het-vuren" in Gaza', 'description': 'md5:1aad1fae7d39edeffde5d3e67d276b64', 'thumbnail': r're:https?://cds\.vrt\.radio/[^/#\?&]+', - 'display_id': 'europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza' + 'display_id': 'europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza', }, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }] def _extract_video_entries(self, next_js_data, display_id): @@ -466,7 +466,7 @@ class Radio1BeIE(VRTBaseIE): 'subtitles': subtitles, **traverse_obj(data, { 'title': ('title', {str}), - 'description': ('body', {clean_html}) + 'description': ('body', {clean_html}), }), } diff --git a/yt_dlp/extractor/vtm.py b/yt_dlp/extractor/vtm.py index 6db49c5..41b41ec 100644 --- a/yt_dlp/extractor/vtm.py +++ b/yt_dlp/extractor/vtm.py @@ -21,7 +21,7 @@ class VTMIE(InfoExtractor): 'duration': 74, # TODO: fix url _type result processing # 'series': 'Op Interventie', - } + }, } def _real_extract(self, url): @@ -42,7 +42,7 @@ class VTMIE(InfoExtractor): title } } -}''' % uuid, +}''' % uuid, # noqa: UP031 }, headers={ 'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e', })['data']['getComponent'] diff --git a/yt_dlp/extractor/vuclip.py b/yt_dlp/extractor/vuclip.py index 0e56298..ad7eab3 100644 --- a/yt_dlp/extractor/vuclip.py +++ b/yt_dlp/extractor/vuclip.py @@ -1,9 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, -) from ..utils import ( ExtractorError, parse_duration, @@ -21,7 +19,7 @@ class VuClipIE(InfoExtractor): 'ext': '3gp', 'title': 'Top 10 TV Convicts', 'duration': 733, - } + }, } def _real_extract(self, url): @@ -31,7 +29,7 @@ class VuClipIE(InfoExtractor): ad_m = re.search( r'''value="No.*?" onClick="location.href='([^"']+)'"''', webpage) if ad_m: - urlr = compat_urllib_parse_urlparse(url) + urlr = urllib.parse.urlparse(url) adfree_url = urlr.scheme + '://' + urlr.netloc + ad_m.group(1) webpage = self._download_webpage( adfree_url, video_id, note='Download post-ad page') @@ -41,7 +39,7 @@ class VuClipIE(InfoExtractor): default=None) if error_msg: raise ExtractorError( - '%s said: %s' % (self.IE_NAME, error_msg), expected=True) + f'{self.IE_NAME} said: {error_msg}', expected=True) # These clowns alternate between two page types video_url = self._search_regex( diff --git a/yt_dlp/extractor/vvvvid.py b/yt_dlp/extractor/vvvvid.py index b961123..4671bcd 100644 --- a/yt_dlp/extractor/vvvvid.py +++ b/yt_dlp/extractor/vvvvid.py @@ -12,7 +12,7 @@ from ..utils import ( class VVVVIDIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/' - _VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE + _VALID_URL = rf'{_VALID_URL_BASE}(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' _TESTS = [{ # video_type == 'video/vvvvid' 'url': 'https://www.vvvvid.it/show/498/the-power-of-computing/518/505692/playstation-vr-cambiera-il-nostro-modo-di-giocare', @@ -109,7 +109,7 @@ class VVVVIDIE(InfoExtractor): }, }, { 'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048', - 'only_matching': True + 'only_matching': True, }] _conn_id = None @@ -132,12 +132,12 @@ class VVVVIDIE(InfoExtractor): if query: q.update(query) response = self._download_json( - 'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path), + f'https://www.vvvvid.it/vvvvid/ondemand/{show_id}/{path}', video_id, headers=self._headers, query=q, fatal=fatal) if not (response or fatal): return if response.get('result') == 'error': - raise ExtractorError('%s said: %s' % ( + raise ExtractorError('{} said: {}'.format( self.IE_NAME, response['message']), expected=True) return response['data'] @@ -151,18 +151,18 @@ class VVVVIDIE(InfoExtractor): show_id, season_id, video_id = self._match_valid_url(url).groups() response = self._download_info( - show_id, 'season/%s' % season_id, + show_id, f'season/{season_id}', video_id, query={'video_id': video_id}) vid = int(video_id) - video_data = list(filter( - lambda episode: episode.get('video_id') == vid, response))[0] + video_data = next(filter( + lambda episode: episode.get('video_id') == vid, response)) title = video_data['title'] formats = [] # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js def ds(h): - g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij" + g = 'MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij' def f(m): l = [] @@ -260,7 +260,7 @@ class VVVVIDIE(InfoExtractor): embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False)) else: formats.extend(self._extract_wowza_formats( - 'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id, skip_protocols=['f4m'])) + f'http://sb.top-ix.org/videomg/_definst_/mp4:{embed_code}/playlist.m3u8', video_id, skip_protocols=['f4m'])) metadata_from_url(embed_code) if not is_youtube: @@ -283,7 +283,7 @@ class VVVVIDIE(InfoExtractor): class VVVVIDShowIE(VVVVIDIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE + _VALID_URL = rf'(?P<base_url>{VVVVIDIE._VALID_URL_BASE}(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' _TESTS = [{ 'url': 'https://www.vvvvid.it/show/156/psyco-pass', 'info_dict': { @@ -307,7 +307,7 @@ class VVVVIDShowIE(VVVVIDIE): # XXX: Do not subclass from concrete IE show_id, 'info/', show_title, fatal=False) if not show_title: - base_url += "/title" + base_url += '/title' entries = [] for season in (seasons or []): diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py index 3ac0f83..442a9bc 100644 --- a/yt_dlp/extractor/walla.py +++ b/yt_dlp/extractor/walla.py @@ -23,7 +23,7 @@ class WallaIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, } _SUBTITLE_LANGS = { @@ -36,7 +36,7 @@ class WallaIE(InfoExtractor): display_id = mobj.group('display_id') video = self._download_xml( - 'http://video2.walla.co.il/?w=null/null/%s/@@/video/flv_pl' % video_id, + f'http://video2.walla.co.il/?w=null/null/{video_id}/@@/video/flv_pl', display_id) item = video.find('./items/item') diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py index 1cfed2d..36e1f30 100644 --- a/yt_dlp/extractor/washingtonpost.py +++ b/yt_dlp/extractor/washingtonpost.py @@ -82,7 +82,7 @@ class WashingtonPostArticleIE(InfoExtractor): 'upload_date': '20141230', 'timestamp': 1419972442, 'title': 'Why black boxes don’t transmit data in real time', - } + }, }], 'skip': 'Doesnt have a video anymore', }, { @@ -92,7 +92,7 @@ class WashingtonPostArticleIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url) + return False if WashingtonPostIE.suitable(url) else super().suitable(url) def _real_extract(self, url): page_id = self._match_id(url) @@ -112,7 +112,7 @@ class WashingtonPostArticleIE(InfoExtractor): if content_element.get('type') == 'video': uuids.append(content_element.get('_id')) - entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids] + entries = [self.url_result(f'washingtonpost:{uuid}', 'WashingtonPost', uuid) for uuid in uuids] return { '_type': 'playlist', diff --git a/yt_dlp/extractor/wat.py b/yt_dlp/extractor/wat.py index 9ea3fdd..03bac66 100644 --- a/yt_dlp/extractor/wat.py +++ b/yt_dlp/extractor/wat.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -52,13 +51,13 @@ class WatIE(InfoExtractor): 'ext': 'mp4', }, 'params': {'skip_download': 'm3u8'}, - } + }, ] _GEO_BYPASS = False def _real_extract(self, url): video_id = self._match_id(url) - video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36)) + video_id = video_id if video_id.isdigit() and len(video_id) > 6 else str(int(video_id, 36)) # 'contentv4' is used in the website, but it also returns the related # videos, we don't need them diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index 0b7ddd2..1c1f0ed 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) from ..utils import ( ExtractorError, determine_ext, @@ -39,7 +36,7 @@ class WDRIE(InfoExtractor): def _asset_url(self, wdr_id): id_len = max(len(wdr_id), 5) - return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id, ), '.js')) + return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id), '.js')) def _real_extract(self, url): video_id = self._match_id(url) @@ -94,7 +91,7 @@ class WDRIE(InfoExtractor): medium_url, 'stream', fatal=False)) else: a_format = { - 'url': medium_url + 'url': medium_url, } if ext == 'unknown_video': urlh = self._request_webpage( @@ -168,7 +165,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE 'upload_date': '20160312', 'description': 'md5:e127d320bc2b1f149be697ce044a3dd7', 'is_live': False, - 'subtitles': {} + 'subtitles': {}, }, 'skip': 'HTTP Error 404: Not Found', }, @@ -202,7 +199,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE 'upload_date': 're:^[0-9]{8}$', 'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$', }, - 'skip': 'The id changes from week to week because of the new episode' + 'skip': 'The id changes from week to week because of the new episode', }, { 'url': 'http://www.wdrmaus.de/filme/sachgeschichten/achterbahn.php5', @@ -228,7 +225,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE }, 'params': { 'skip_download': True, # m3u8 download - } + }, }, { 'url': 'http://www.sportschau.de/handballem2018/handball-nationalmannschaft-em-stolperstein-vorrunde-100.html', @@ -260,7 +257,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE 'alt_title': 'Rockpalast', 'upload_date': '20220725', }, - } + }, ] def _real_extract(self, url): @@ -289,14 +286,14 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE if not media_link_obj: continue jsonp_url = try_get( - media_link_obj, lambda x: x['mediaObj']['url'], compat_str) + media_link_obj, lambda x: x['mediaObj']['url'], str) if jsonp_url: # metadata, or player JS with ['ref'] giving WDR id, or just media, perhaps clip_id = media_link_obj['mediaObj'].get('ref') if jsonp_url.endswith('.assetjsonp'): asset = self._download_json( jsonp_url, display_id, fatal=False, transform_source=strip_jsonp) - clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], compat_str) + clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], str) if clip_id: jsonp_url = self._asset_url(clip_id[4:]) entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key())) @@ -305,7 +302,7 @@ class WDRPageIE(WDRIE): # XXX: Do not subclass from concrete IE if not entries: entries = [ self.url_result( - compat_urlparse.urljoin(url, mobj.group('href')), + urllib.parse.urljoin(url, mobj.group('href')), ie=WDRPageIE.ie_key()) for mobj in re.finditer( r'<a[^>]+\bhref=(["\'])(?P<href>(?:(?!\1).)+)\1[^>]+\bdata-extension(?:-ard)?=', @@ -324,7 +321,7 @@ class WDRElefantIE(InfoExtractor): 'title': 'Wippe', 'id': 'mdb-1198320', 'ext': 'mp4', - 'upload_date': '20071003' + 'upload_date': '20071003', }, } @@ -348,7 +345,7 @@ class WDRElefantIE(InfoExtractor): zmdb_url_element = xml_metadata.find('./movie/zmdb_url') if zmdb_url_element is None: raise ExtractorError( - '%s is not a video' % display_id, expected=True) + f'{display_id} is not a video', expected=True) return self.url_result(zmdb_url_element.text, ie=WDRIE.ie_key()) @@ -368,7 +365,7 @@ class WDRMobileIE(InfoExtractor): 'ext': 'mp4', 'age_limit': 0, }, - 'skip': 'Problems with loading data.' + 'skip': 'Problems with loading data.', } def _real_extract(self, url): diff --git a/yt_dlp/extractor/webcamerapl.py b/yt_dlp/extractor/webcamerapl.py index a02d951..e0ee17c 100644 --- a/yt_dlp/extractor/webcamerapl.py +++ b/yt_dlp/extractor/webcamerapl.py @@ -12,7 +12,7 @@ class WebcameraplIE(InfoExtractor): 'ext': 'mp4', 'title': r're:WIDOK NA PLAC ZAMKOWY W WARSZAWIE \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'live_status': 'is_live', - } + }, }, { 'url': 'https://gdansk-stare-miasto.webcamera.pl/', 'info_dict': { @@ -20,7 +20,7 @@ class WebcameraplIE(InfoExtractor): 'ext': 'mp4', 'title': r're:GDAŃSK - widok na Stare Miasto \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'live_status': 'is_live', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/webcaster.py b/yt_dlp/extractor/webcaster.py index 43eeca0..b0865e3 100644 --- a/yt_dlp/extractor/webcaster.py +++ b/yt_dlp/extractor/webcaster.py @@ -35,7 +35,7 @@ class WebcasterIE(InfoExtractor): formats = [] for format_id in (None, 'noise'): track_tag = join_nonempty('track', format_id, delim='_') - for track in video.findall('.//iphone/%s' % track_tag): + for track in video.findall(f'.//iphone/{track_tag}'): track_url = track.text if not track_url: continue diff --git a/yt_dlp/extractor/webofstories.py b/yt_dlp/extractor/webofstories.py index 65f48f3..24befe7 100644 --- a/yt_dlp/extractor/webofstories.py +++ b/yt_dlp/extractor/webofstories.py @@ -22,7 +22,7 @@ class WebOfStoriesIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'Hans Bethe talks about calculating the temperature of the sun', 'duration': 238, - } + }, }, { 'url': 'http://www.webofstories.com/play/55908', 'md5': '2985a698e1fe3211022422c4b5ed962c', @@ -79,19 +79,15 @@ class WebOfStoriesIE(InfoExtractor): ms_prefix = 'mini_sites/' if is_great_life_series: - mp4_url = '{0:}lives/{1:}/{2:}.mp4'.format( - self._VIDEO_DOMAIN, speaker_id, story_filename) + mp4_url = f'{self._VIDEO_DOMAIN}lives/{speaker_id}/{story_filename}.mp4' rtmp_ext = 'flv' streamer = self._GREAT_LIFE_STREAMER - play_path = 'stories/{0:}/{1:}'.format( - speaker_id, story_filename) + play_path = f'stories/{speaker_id}/{story_filename}' else: - mp4_url = '{0:}{1:}{2:}/{3:}.mp4'.format( - self._VIDEO_DOMAIN, ms_prefix, speaker_id, story_filename) + mp4_url = f'{self._VIDEO_DOMAIN}{ms_prefix}{speaker_id}/{story_filename}.mp4' rtmp_ext = 'mp4' streamer = self._USER_STREAMER - play_path = 'mp4:{0:}{1:}/{2}.mp4'.format( - ms_prefix, speaker_id, story_filename) + play_path = f'mp4:{ms_prefix}{speaker_id}/{story_filename}.mp4' formats = [{ 'format_id': 'mp4_sd', @@ -132,7 +128,7 @@ class WebOfStoriesPlaylistIE(InfoExtractor): entries = [ self.url_result( - 'http://www.webofstories.com/play/%s' % video_id, + f'http://www.webofstories.com/play/{video_id}', 'WebOfStories', video_id=video_id) for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage)) ] @@ -145,7 +141,7 @@ class WebOfStoriesPlaylistIE(InfoExtractor): r'<span id="primaryField">([^<]+)</span>', webpage, 'field', default=None) if field: - title += ' (%s)' % field + title += f' ({field})' if not title: title = self._search_regex( diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index b6a6593..b5c0e92 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -34,7 +34,7 @@ class WeiboBaseIE(InfoExtractor): 'browser': f'Chrome{chrome_ver},0,0,0', 'fonts': 'undefined', 'screenInfo': '1920*1080*24', - 'plugins': '' + 'plugins': '', }, separators=(',', ':'))}))['data'] self._download_webpage( @@ -52,6 +52,7 @@ class WeiboBaseIE(InfoExtractor): }) def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs): + # XXX: Always fatal; _download_webpage_handle only returns False (not a tuple) on error webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs) if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com': self._update_visitor_cookies(urlh.url, video_id) @@ -90,7 +91,7 @@ class WeiboBaseIE(InfoExtractor): 'video_details', lambda _, v: v['label'].startswith(format_id), { 'size': ('size', {int_or_none}), 'tbr': ('bitrate', {int_or_none}), - } + }, ), get_all=False), }) return formats @@ -162,7 +163,7 @@ class WeiboIE(WeiboBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, { 'url': 'https://weibo.com/0/4224132150961381', 'note': 'no playback_list example', @@ -185,7 +186,7 @@ class WeiboVideoIE(WeiboBaseIE): 'ext': 'mp4', 'display_id': 'LEZDodaiW', 'title': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了', - 'description': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了 http://t.cn/A6aerGsM ', + 'description': '呃,稍微了解了一下靡烟miya,感觉这东西也太二了 http://t.cn/A6aerGsM \u200b\u200b\u200b', 'duration': 76, 'timestamp': 1659344278, 'upload_date': '20220801', @@ -196,7 +197,7 @@ class WeiboVideoIE(WeiboBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/wevidi.py b/yt_dlp/extractor/wevidi.py index 3b6d032..0db52af 100644 --- a/yt_dlp/extractor/wevidi.py +++ b/yt_dlp/extractor/wevidi.py @@ -15,7 +15,7 @@ class WeVidiIE(InfoExtractor): 'description': 'md5:73a27d0a87d49fbcc5584566326ebeed', 'uploader': 'eclecRC', 'duration': 932.098, - } + }, }, { 'url': 'https://wevidi.net/watch/ievRuuQHbPS', 'md5': 'ce8a94989a959bff9003fa27ee572935', @@ -27,7 +27,7 @@ class WeVidiIE(InfoExtractor): 'description': 'md5:32cdfca272687390d9bd9b0c9c6153ee', 'uploader': 'WeVidi', 'duration': 36.1999, - } + }, }, { 'url': 'https://wevidi.net/watch/PcMzDWaQSWb', 'md5': '55ee0d3434be5d9e5cc76b83f2bb57ec', @@ -39,7 +39,7 @@ class WeVidiIE(InfoExtractor): 'description': 'md5:e2c9e2b54b8bb424cc64937c8fdc068f', 'uploader': 'WeVidi', 'duration': 41.972, - } + }, }, { 'url': 'https://wevidi.net/watch/wJnRqDHNe_u', 'md5': 'c8f263dd47e66cc17546b3abf47b5a77', @@ -51,7 +51,7 @@ class WeVidiIE(InfoExtractor): 'description': 'md5:e65036f0d4af80e0af191bd11af5195e', 'uploader': 'GissyEva', 'duration': 630.451, - } + }, }, { 'url': 'https://wevidi.net/watch/4m1c4yJR_yc', 'md5': 'c63ce5ca6990dce86855fc02ca5bc1ed', @@ -63,7 +63,7 @@ class WeVidiIE(InfoExtractor): 'description': 'md5:96af99dd63468b2dfab3020560e3e9b2', 'uploader': 'eclecRC', 'duration': 6.804, - } + }, }] def _extract_formats(self, wvplayer_props): @@ -74,7 +74,7 @@ class WeVidiIE(InfoExtractor): 3: 360, 4: 480, 5: 720, - 6: 1080 + 6: 1080, } src_path = f'{wvplayer_props["srcVID"]}/{wvplayer_props["srcUID"]}/{wvplayer_props["srcNAME"]}' diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py index 492891d..9ca5c3b 100644 --- a/yt_dlp/extractor/whowatch.py +++ b/yt_dlp/extractor/whowatch.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -21,8 +20,8 @@ class WhoWatchIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) self._download_webpage(url, video_id) - metadata = self._download_json('https://api.whowatch.tv/lives/%s' % video_id, video_id) - live_data = self._download_json('https://api.whowatch.tv/lives/%s/play' % video_id, video_id) + metadata = self._download_json(f'https://api.whowatch.tv/lives/{video_id}', video_id) + live_data = self._download_json(f'https://api.whowatch.tv/lives/{video_id}/play', video_id) title = try_call( lambda: live_data['share_info']['live_title'][1:-1], @@ -37,7 +36,7 @@ class WhoWatchIE(InfoExtractor): formats = [] for i, fmt in enumerate(live_data.get('streams') or []): - name = fmt.get('quality') or fmt.get('name') or compat_str(i) + name = fmt.get('quality') or fmt.get('name') or str(i) hls_url = fmt.get('hls_url') rtmp_url = fmt.get('rtmp_url') audio_only = fmt.get('audio_only') @@ -45,7 +44,7 @@ class WhoWatchIE(InfoExtractor): if hls_url: hls_fmts = self._extract_m3u8_formats( - hls_url, video_id, ext='mp4', m3u8_id='hls-%s' % name, quality=quality) + hls_url, video_id, ext='mp4', m3u8_id=f'hls-{name}', quality=quality) formats.extend(hls_fmts) else: hls_fmts = [] @@ -54,7 +53,7 @@ class WhoWatchIE(InfoExtractor): if rtmp_url and not audio_only: formats.append({ 'url': rtmp_url, - 'format_id': 'rtmp-%s' % name, + 'format_id': f'rtmp-{name}', 'ext': 'mp4', 'protocol': 'rtmp_ffmpeg', # ffmpeg can, while rtmpdump can't 'vcodec': 'h264', @@ -71,12 +70,12 @@ class WhoWatchIE(InfoExtractor): hls_url, video_id, ext='mp4', m3u8_id='hls')) self._remove_duplicate_formats(formats) - uploader_url = try_get(metadata, lambda x: x['live']['user']['user_path'], compat_str) + uploader_url = try_get(metadata, lambda x: x['live']['user']['user_path'], str) if uploader_url: - uploader_url = 'https://whowatch.tv/profile/%s' % uploader_url - uploader_id = compat_str(try_get(metadata, lambda x: x['live']['user']['id'], int)) - uploader = try_get(metadata, lambda x: x['live']['user']['name'], compat_str) - thumbnail = try_get(metadata, lambda x: x['live']['latest_thumbnail_url'], compat_str) + uploader_url = f'https://whowatch.tv/profile/{uploader_url}' + uploader_id = str(try_get(metadata, lambda x: x['live']['user']['id'], int)) + uploader = try_get(metadata, lambda x: x['live']['user']['name'], str) + thumbnail = try_get(metadata, lambda x: x['live']['latest_thumbnail_url'], str) timestamp = int_or_none(try_get(metadata, lambda x: x['live']['started_at'], int), scale=1000) view_count = try_get(metadata, lambda x: x['live']['total_view_count'], int) comment_count = try_get(metadata, lambda x: x['live']['comment_count'], int) diff --git a/yt_dlp/extractor/wikimedia.py b/yt_dlp/extractor/wikimedia.py index 11c801f..6326930 100644 --- a/yt_dlp/extractor/wikimedia.py +++ b/yt_dlp/extractor/wikimedia.py @@ -24,8 +24,8 @@ class WikimediaIE(InfoExtractor): 'description': 'md5:7cd84f76e7081f1be033d0b155b4a460', 'license': 'Creative Commons Attribution 4.0 International', 'uploader': 'ZDF/Terra X/Gruppe 5/Luise Wagner, Jonas Sichert, Andreas Hougardy', - 'subtitles': 'count:4' - } + 'subtitles': 'count:4', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py index d7d77c0..000d143 100644 --- a/yt_dlp/extractor/wimtv.py +++ b/yt_dlp/extractor/wimtv.py @@ -10,14 +10,14 @@ from ..utils import ( class WimTVIE(InfoExtractor): _player = None _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' - _VALID_URL = r'''(?x: + _VALID_URL = rf'''(?x: https?://platform\.wim\.tv/ (?: (?:embed/)?\? |\#/webtv/.+?/ ) (?P<type>vod|live|cast)[=/] - (?P<id>%s).*?)''' % _UUID_RE + (?P<id>{_UUID_RE}).*?)''' _EMBED_REGEX = [rf'<iframe[^>]+src=["\'](?P<url>{_VALID_URL})'] _TESTS = [{ # vod stream @@ -28,7 +28,7 @@ class WimTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'AMA SUPERCROSS 2020 - R2 ST. LOUIS', 'duration': 6481, - 'thumbnail': r're:https?://.+?/thumbnail/.+?/720$' + 'thumbnail': r're:https?://.+?/thumbnail/.+?/720$', }, 'params': { 'skip_download': True, @@ -66,7 +66,7 @@ class WimTVIE(InfoExtractor): 'vars': [{ 'regex': r'appAuth = "(.+?)"', 'variable': 'app_auth', - }] + }], }, { 'url': 'https://platform.wim.tv/common/config/endpointconfig.js', 'vars': [{ @@ -75,7 +75,7 @@ class WimTVIE(InfoExtractor): }, { 'regex': r'PRODUCTION_HOSTNAME_THUMB\s*\+\s*"(.+?)"', 'variable': 'thumb_server_path', - }] + }], }] for data in datas: @@ -83,13 +83,13 @@ class WimTVIE(InfoExtractor): for var in data['vars']: val = self._search_regex(var['regex'], temp, msg_id) if not val: - raise ExtractorError('%s not found' % var['variable']) + raise ExtractorError('{} not found'.format(var['variable'])) self._player[var['variable']] = val def _generate_token(self): json = self._download_json( 'https://platform.wim.tv/wimtv-server/oauth/token', 'Token generation', - headers={'Authorization': 'Basic %s' % self._player['app_auth']}, + headers={'Authorization': 'Basic {}'.format(self._player['app_auth'])}, data=urlencode_postdata({'grant_type': 'client_credentials'})) token = json.get('access_token') if not token: @@ -101,7 +101,7 @@ class WimTVIE(InfoExtractor): return None if not self._player.get('thumb_server_path'): self._player['thumb_server_path'] = '' - return '%s%s/asset/thumbnail/%s/%s' % ( + return '{}{}/asset/thumbnail/{}/{}'.format( self._player['thumb_server'], self._player['thumb_server_path'], thumb_id, width) @@ -118,11 +118,11 @@ class WimTVIE(InfoExtractor): is_live = False token = self._generate_token() json = self._download_json( - 'https://platform.wim.tv/wimtv-server/api/public/%s/%s/play' % ( - stream_type, video_id), video_id, - headers={'Authorization': 'Bearer %s' % token, - 'Content-Type': 'application/json'}, - data=bytes('{}', 'utf-8')) + f'https://platform.wim.tv/wimtv-server/api/public/{stream_type}/{video_id}/play', + video_id, headers={ + 'Authorization': f'Bearer {token}', + 'Content-Type': 'application/json', + }, data=b'{}') formats = [] for src in json.get('srcs') or []: diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index f2256fd..fb2a864 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -24,7 +24,7 @@ class WistiaBaseIE(InfoExtractor): _EMBED_BASE_URL = 'http://fast.wistia.net/embed/' def _download_embed_config(self, config_type, config_id, referer): - base_url = self._EMBED_BASE_URL + '%s/%s' % (config_type, config_id) + base_url = self._EMBED_BASE_URL + f'{config_type}/{config_id}' embed_config = self._download_json( base_url + '.json', config_id, headers={ 'Referer': referer if referer.startswith('http') else base_url, # Some videos require this. @@ -74,7 +74,7 @@ class WistiaBaseIE(InfoExtractor): display_name = a.get('display_name') format_id = atype if atype and atype.endswith('_video') and display_name: - format_id = '%s-%s' % (atype[:-6], display_name) + format_id = f'{atype[:-6]}-{display_name}' f = { 'format_id': format_id, 'url': aurl, @@ -157,7 +157,7 @@ class WistiaBaseIE(InfoExtractor): class WistiaIE(WistiaBaseIE): - _VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX) + _VALID_URL = rf'(?:wistia:|{WistiaBaseIE._VALID_URL_BASE}(?:iframe|medias)/){WistiaBaseIE._VALID_ID_REGEX}' _EMBED_REGEX = [ r'''(?x) <(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'] @@ -189,7 +189,7 @@ class WistiaIE(WistiaBaseIE): 'duration': 966.0, 'timestamp': 1616614369, 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/53dc60239348dc9b9fba3755173ea4c2.png', - } + }, }, { 'url': 'wistia:5vd7p4bct5', 'md5': 'b9676d24bf30945d97060638fbfe77f0', @@ -228,7 +228,7 @@ class WistiaIE(WistiaBaseIE): 'description': 'md5:27abc99a758573560be72600ef95cece', 'upload_date': '20210421', 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/6c551820ae950cdee2306d6cbe9ef742.jpg', - } + }, }, { 'url': 'https://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', 'md5': 'b9676d24bf30945d97060638fbfe77f0', @@ -254,19 +254,19 @@ class WistiaIE(WistiaBaseIE): urls = list(super()._extract_embed_urls(url, webpage)) for match in cls._extract_wistia_async_embed(webpage): if match.group('type') != 'wistia_channel': - urls.append('wistia:%s' % match.group('id')) + urls.append('wistia:{}'.format(match.group('id'))) for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage): - urls.append('wistia:%s' % match.group('id')) + urls.append('wistia:{}'.format(match.group('id'))) if not WistiaChannelIE._extract_embed_urls(url, webpage): # Fallback media_id = cls._extract_url_media_id(url) if media_id: - urls.append('wistia:%s' % match.group('id')) + urls.append('wistia:{}'.format(match.group('id'))) return urls class WistiaPlaylistIE(WistiaBaseIE): - _VALID_URL = r'%splaylists/%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX) + _VALID_URL = rf'{WistiaBaseIE._VALID_URL_BASE}playlists/{WistiaBaseIE._VALID_ID_REGEX}' _TEST = { 'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc', @@ -291,7 +291,7 @@ class WistiaPlaylistIE(WistiaBaseIE): class WistiaChannelIE(WistiaBaseIE): - _VALID_URL = r'(?:wistiachannel:|%schannel/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX) + _VALID_URL = rf'(?:wistiachannel:|{WistiaBaseIE._VALID_URL_BASE}channel/){WistiaBaseIE._VALID_ID_REGEX}' _TESTS = [{ # JSON Embed API returns 403, should fall back to webpage @@ -299,7 +299,7 @@ class WistiaChannelIE(WistiaBaseIE): 'info_dict': { 'id': 'yvyvu7wjbg', 'title': 'Copysmith Tutorials and Education!', - 'description': 'Learn all things Copysmith via short and informative videos!' + 'description': 'Learn all things Copysmith via short and informative videos!', }, 'playlist_mincount': 7, 'expected_warnings': ['falling back to webpage'], @@ -370,7 +370,7 @@ class WistiaChannelIE(WistiaBaseIE): self.report_warning('Failed to download channel data from API, falling back to webpage.') webpage = self._download_webpage(f'https://fast.wistia.net/embed/channel/{channel_id}', channel_id) data = self._parse_json( - self._search_regex(r'wchanneljsonp-%s\'\]\s*=[^\"]*\"([A-Za-z0-9=/]*)' % channel_id, webpage, 'jsonp', channel_id), + self._search_regex(rf'wchanneljsonp-{channel_id}\'\]\s*=[^\"]*\"([A-Za-z0-9=/]*)', webpage, 'jsonp', channel_id), channel_id, transform_source=lambda x: urllib.parse.unquote_plus(base64.b64decode(x).decode('utf-8'))) # XXX: can there be more than one series? diff --git a/yt_dlp/extractor/wordpress.py b/yt_dlp/extractor/wordpress.py index 378d99d..a0a3194 100644 --- a/yt_dlp/extractor/wordpress.py +++ b/yt_dlp/extractor/wordpress.py @@ -44,10 +44,10 @@ class WordpressPlaylistEmbedIE(InfoExtractor): 'duration': 49.0, 'artist': 'Nancy and Randall Faber', 'description': 'md5:a9f8e9aeabbd2912bc13cc0fab1a4ce8', - } + }, }], 'playlist_count': 6, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }] def _extract_from_webpage(self, url, webpage): @@ -94,11 +94,11 @@ class WordpressMiniAudioPlayerEmbedIE(InfoExtractor): 'id': 'over_the_horizon_2013', 'ext': 'mp3', 'title': 'Over the Horizon 2013', - 'url': 'http://news.samsung.com/global/wp-content/uploads/ringtones/over_the_horizon_2013.mp3' - } + 'url': 'http://news.samsung.com/global/wp-content/uploads/ringtones/over_the_horizon_2013.mp3', + }, }], 'playlist_count': 6, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # Version 1.9.3: https://plugins.trac.wordpress.org/browser/wp-miniaudioplayer/tags/1.9.3 'url': 'https://www.booksontape.com/collections/audiobooks-with-teacher-guides/', @@ -108,7 +108,7 @@ class WordpressMiniAudioPlayerEmbedIE(InfoExtractor): 'age_limit': 0, 'thumbnail': 'https://www.booksontape.com/wp-content/uploads/2016/09/bot-logo-1200x630.jpg', }, - 'playlist_mincount': 12 + 'playlist_mincount': 12, }, { # Version 1.9.7: https://plugins.trac.wordpress.org/browser/wp-miniaudioplayer/tags/1.9.7 # But has spaces around href filter @@ -122,7 +122,7 @@ class WordpressMiniAudioPlayerEmbedIE(InfoExtractor): 'thumbnail': 'https://www.estudiords.com.br/wp-content/uploads/2021/03/LOGO-TEMAS.png', 'description': 'md5:ab24d6a7ed0312ad2d466e721679f5a0', }, - 'playlist_mincount': 30 + 'playlist_mincount': 30, }] def _extract_from_webpage(self, url, webpage): diff --git a/yt_dlp/extractor/worldstarhiphop.py b/yt_dlp/extractor/worldstarhiphop.py index c6948a1..3af4bd4 100644 --- a/yt_dlp/extractor/worldstarhiphop.py +++ b/yt_dlp/extractor/worldstarhiphop.py @@ -9,8 +9,8 @@ class WorldStarHipHopIE(InfoExtractor): 'info_dict': { 'id': 'wshh6a7q1ny0G34ZwuIO', 'ext': 'mp4', - 'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!' - } + 'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!', + }, }, { 'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO', 'only_matching': True, diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py index 0ef4e8e..b4cc1ab 100644 --- a/yt_dlp/extractor/wppilot.py +++ b/yt_dlp/extractor/wppilot.py @@ -103,7 +103,7 @@ class WPPilotIE(WPPilotBaseIE): is_authorized = next((c for c in self.cookiejar if c.name == 'netviapisessid'), None) # cookies starting with "g:" are assigned to guests - is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False + is_authorized = is_authorized is not None and not is_authorized.value.startswith('g:') video = self._download_json( (self._VIDEO_URL if is_authorized else self._VIDEO_GUEST_URL) % video_id, @@ -120,7 +120,7 @@ class WPPilotIE(WPPilotBaseIE): data=json.dumps({ 'channelId': video_id, 't': stream_token, - }).encode('utf-8')) + }).encode()) if try_get(close, lambda x: x['data']['status']) == 'ok': return self.url_result(url, ie=WPPilotIE.ie_key()) diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py index 35fe303..b6b656f 100644 --- a/yt_dlp/extractor/wsj.py +++ b/yt_dlp/extractor/wsj.py @@ -2,6 +2,7 @@ from .common import InfoExtractor from ..utils import ( float_or_none, int_or_none, + join_nonempty, unified_strdate, ) @@ -76,7 +77,7 @@ class WSJIE(InfoExtractor): tbr = int_or_none(v.get('bitrate')) formats.append({ 'url': mp4_url, - 'format_id': 'http' + ('-%d' % tbr if tbr else ''), + 'format_id': join_nonempty('http', tbr), 'tbr': tbr, 'width': int_or_none(v.get('width')), 'height': int_or_none(v.get('height')), @@ -108,7 +109,7 @@ class WSJArticleIE(InfoExtractor): 'upload_date': '20170221', 'uploader_id': 'ralcaraz', 'title': 'Bao Bao the Panda Leaves for China', - } + }, } def _real_extract(self, url): @@ -117,4 +118,4 @@ class WSJArticleIE(InfoExtractor): video_id = self._search_regex( r'(?:id=["\']video|video-|iframe\.html\?guid=|data-src=["\'])([a-fA-F0-9-]{36})', webpage, 'video id') - return self.url_result('wsj:%s' % video_id, WSJIE.ie_key(), video_id) + return self.url_result(f'wsj:{video_id}', WSJIE.ie_key(), video_id) diff --git a/yt_dlp/extractor/wwe.py b/yt_dlp/extractor/wwe.py index 9bbd477..3b8197a 100644 --- a/yt_dlp/extractor/wwe.py +++ b/yt_dlp/extractor/wwe.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( try_get, unescapeHTML, @@ -17,7 +16,7 @@ class WWEBaseIE(InfoExtractor): } def _extract_entry(self, data, url, video_id=None): - video_id = compat_str(video_id or data['nid']) + video_id = str(video_id or data['nid']) title = data['title'] formats = self._extract_m3u8_formats( @@ -69,7 +68,7 @@ class WWEIE(WWEBaseIE): 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018', 'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, }, { 'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018', 'only_matching': True, @@ -111,7 +110,7 @@ class WWEPlaylistIE(WWEBaseIE): @classmethod def suitable(cls, url): - return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url) + return False if WWEIE.suitable(url) else super().suitable(url) def _real_extract(self, url): display_id = self._match_id(url) diff --git a/yt_dlp/extractor/wykop.py b/yt_dlp/extractor/wykop.py index 1d29cc8..2ae0a2a 100644 --- a/yt_dlp/extractor/wykop.py +++ b/yt_dlp/extractor/wykop.py @@ -209,7 +209,7 @@ class WykopPostIE(WykopBaseExtractor): 'playlist_mincount': 15, 'params': { 'flat_playlist': True, - } + }, }] @classmethod diff --git a/yt_dlp/extractor/xanimu.py b/yt_dlp/extractor/xanimu.py index e0b7bf9..b489358 100644 --- a/yt_dlp/extractor/xanimu.py +++ b/yt_dlp/extractor/xanimu.py @@ -16,11 +16,11 @@ class XanimuIE(InfoExtractor): 'thumbnail': 'https://xanimu.com/storage/2020/09/the-princess-and-the-frog-hentai.jpg', 'description': r're:^Enjoy The Princess \+ The Frog Hentai', 'duration': 207.0, - 'age_limit': 18 - } + 'age_limit': 18, + }, }, { 'url': 'https://xanimu.com/huge-expansion/', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -28,14 +28,15 @@ class XanimuIE(InfoExtractor): webpage = self._download_webpage(url, video_id) formats = [] - for format in ['videoHigh', 'videoLow']: - format_url = self._search_json(r'var\s+%s\s*=' % re.escape(format), webpage, format, - video_id, default=None, contains_pattern=r'[\'"]([^\'"]+)[\'"]') + for format_id in ['videoHigh', 'videoLow']: + format_url = self._search_json( + rf'var\s+{re.escape(format_id)}\s*=', webpage, format_id, + video_id, default=None, contains_pattern=r'[\'"]([^\'"]+)[\'"]') if format_url: formats.append({ 'url': format_url, - 'format_id': format, - 'quality': -2 if format.endswith('Low') else None, + 'format_id': format_id, + 'quality': -2 if format_id.endswith('Low') else None, }) return { @@ -47,5 +48,5 @@ class XanimuIE(InfoExtractor): 'description': self._html_search_meta('description', webpage, default=None), 'duration': int_or_none(self._search_regex(r'duration:\s*[\'"]([^\'"]+?)[\'"]', webpage, 'duration', fatal=False)), - 'age_limit': 18 + 'age_limit': 18, } diff --git a/yt_dlp/extractor/xboxclips.py b/yt_dlp/extractor/xboxclips.py index 235b567..d726e62 100644 --- a/yt_dlp/extractor/xboxclips.py +++ b/yt_dlp/extractor/xboxclips.py @@ -21,7 +21,7 @@ class XboxClipsIE(InfoExtractor): 'filesize_approx': 26800000, 'upload_date': '20140807', 'duration': 56, - } + }, }, { 'url': 'https://gameclips.io/iAbdulElah/074a69a9-5faf-46aa-b93b-9909c1720325', 'only_matching': True, @@ -32,7 +32,7 @@ class XboxClipsIE(InfoExtractor): if '/video.php' in url: qs = parse_qs(url) - url = 'https://gameclips.io/%s/%s' % (qs['gamertag'][0], qs['vid'][0]) + url = 'https://gameclips.io/{}/{}'.format(qs['gamertag'][0], qs['vid'][0]) webpage = self._download_webpage(url, video_id) info = self._parse_html5_media_entries(url, webpage, video_id)[0] diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 0b3a620..c965c30 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -2,7 +2,6 @@ import itertools import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, clean_html, @@ -22,14 +21,14 @@ from ..utils import ( class XHamsterIE(InfoExtractor): _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)' - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) https?:// - (?:[^/?#]+\.)?%s/ + (?:[^/?#]+\.)?{_DOMAINS}/ (?: movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html| videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+) ) - ''' % _DOMAINS + ''' _TESTS = [{ 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', 'md5': '34e1ab926db5dc2750fed9e1f34304bb', @@ -177,7 +176,7 @@ class XHamsterIE(InfoExtractor): continue format_urls.add(format_url) formats.append({ - 'format_id': '%s-%s' % (format_id, quality), + 'format_id': f'{format_id}-{quality}', 'url': format_url, 'ext': determine_ext(format_url, 'mp4'), 'height': get_height(quality), @@ -228,7 +227,7 @@ class XHamsterIE(InfoExtractor): or str_or_none(standard_format.get('label')) or '') formats.append({ - 'format_id': '%s-%s' % (format_id, quality), + 'format_id': f'{format_id}-{quality}', 'url': standard_url, 'ext': ext, 'height': get_height(quality), @@ -245,7 +244,7 @@ class XHamsterIE(InfoExtractor): if not isinstance(c, dict): continue c_name = c.get('name') - if isinstance(c_name, compat_str): + if isinstance(c_name, str): categories.append(c_name) else: categories = None @@ -258,7 +257,7 @@ class XHamsterIE(InfoExtractor): 'description': video.get('description'), 'timestamp': int_or_none(video.get('created')), 'uploader': try_get( - video, lambda x: x['author']['name'], compat_str), + video, lambda x: x['author']['name'], str), 'uploader_url': uploader_url, 'uploader_id': uploader_url.split('/')[-1] if uploader_url else None, 'thumbnail': video.get('thumbURL'), @@ -372,7 +371,7 @@ class XHamsterIE(InfoExtractor): class XHamsterEmbedIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/?#]+\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS + _VALID_URL = rf'https?://(?:[^/?#]+\.)?{XHamsterIE._DOMAINS}/xembed\.php\?video=(?P<id>\d+)' _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1'] _TEST = { 'url': 'http://xhamster.com/xembed.php?video=3328539', @@ -385,7 +384,7 @@ class XHamsterEmbedIE(InfoExtractor): 'uploader': 'ManyakisArt', 'duration': 5, 'age_limit': 18, - } + }, } def _real_extract(self, url): @@ -394,14 +393,14 @@ class XHamsterEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id), + rf'href="(https?://xhamster\.com/(?:movies/{video_id}/[^"]*\.html|videos/[^/]*-{video_id})[^"]*)"', webpage, 'xhamster url', default=None) if not video_url: - vars = self._parse_json( + player_vars = self._parse_json( self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'), video_id) - video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl')) + video_url = dict_get(player_vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl')) return self.url_result(video_url, 'XHamster') @@ -441,7 +440,7 @@ class XHamsterUserIE(InfoExtractor): next_page_url = f'https://xhamster.com/{prefix}/{user_id}/{suffix}/1' for pagenum in itertools.count(1): page = self._download_webpage( - next_page_url, user_id, 'Downloading page %s' % pagenum) + next_page_url, user_id, f'Downloading page {pagenum}') for video_tag in re.findall( r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)', page): diff --git a/yt_dlp/extractor/xiaohongshu.py b/yt_dlp/extractor/xiaohongshu.py index faad9d9..00c6ed7 100644 --- a/yt_dlp/extractor/xiaohongshu.py +++ b/yt_dlp/extractor/xiaohongshu.py @@ -25,7 +25,7 @@ class XiaoHongShuIE(InfoExtractor): 'tags': ['今日快乐今日发', '吃货薯看这里', '香妃蛋糕', '小五卷蛋糕', '新手蛋糕卷'], 'duration': 101.726, 'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[a-z0-9]+/[\w]+', - } + }, }] def _real_extract(self, url): @@ -51,7 +51,7 @@ class XiaoHongShuIE(InfoExtractor): 'tbr': ('avgBitrate', {int_or_none}), 'format': ('qualityType', {str}), 'filesize': ('size', {int_or_none}), - 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}) + 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}), }) formats.extend(traverse_obj(info, (('mediaUrl', ('backupUrls', ...)), { diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index c98c8a4..e900a4a 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -22,7 +22,7 @@ class XimalayaIE(XimalayaBaseIE): 'uploader_id': '61425525', 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/', 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白', - 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。", + 'description': 'contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。', 'thumbnail': r're:^https?://.*\.jpg', 'thumbnails': [ { @@ -33,14 +33,14 @@ class XimalayaIE(XimalayaBaseIE): 'name': 'cover_url_142', 'url': r're:^https?://.*\.jpg', 'width': 180, - 'height': 180 - } + 'height': 180, + }, ], 'categories': ['其他'], 'duration': 93, 'view_count': int, 'like_count': int, - } + }, }, { 'url': 'http://m.ximalaya.com/61425525/sound/47740352/', @@ -51,7 +51,7 @@ class XimalayaIE(XimalayaBaseIE): 'uploader_id': '61425525', 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/', 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白', - 'description': "contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。", + 'description': 'contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。', 'thumbnail': r're:^https?://.*\.jpg', 'thumbnails': [ { @@ -62,35 +62,35 @@ class XimalayaIE(XimalayaBaseIE): 'name': 'cover_url_142', 'url': r're:^https?://.*\.jpg', 'width': 180, - 'height': 180 - } + 'height': 180, + }, ], 'categories': ['人文'], 'duration': 93, 'view_count': int, 'like_count': int, - } - } + }, + }, ] def _real_extract(self, url): scheme = 'https' if url.startswith('https') else 'http' audio_id = self._match_id(url) - audio_info_file = '%s://m.ximalaya.com/tracks/%s.json' % (scheme, audio_id) - audio_info = self._download_json(audio_info_file, audio_id, - 'Downloading info json %s' % audio_info_file, - 'Unable to download info file') + audio_info_file = f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json' + audio_info = self._download_json( + audio_info_file, audio_id, + f'Downloading info json {audio_info_file}', 'Unable to download info file') formats = [{ 'format_id': f'{bps}k', 'url': audio_info[k], 'abr': bps, - 'vcodec': 'none' + 'vcodec': 'none', } for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)] thumbnails = [] - for k in audio_info.keys(): + for k in audio_info: # cover pics kyes like: cover_url', 'cover_url_142' if k.startswith('cover_url'): thumbnail = {'name': k, 'url': audio_info[k]} diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py index bd67e8b..1084991 100644 --- a/yt_dlp/extractor/xinpianchang.py +++ b/yt_dlp/extractor/xinpianchang.py @@ -25,7 +25,7 @@ class XinpianchangIE(InfoExtractor): 'uploader': '正时文创', 'uploader_id': '10357277', 'categories': ['宣传片', '国家城市', '广告', '其他'], - 'tags': ['北京冬奥会', '冰墩墩', '再见', '告别', '冰墩墩哭了', '感动', '闭幕式', '熄火'] + 'tags': ['北京冬奥会', '冰墩墩', '再见', '告别', '冰墩墩哭了', '感动', '闭幕式', '熄火'], }, }, { 'url': 'https://www.xinpianchang.com/a11762904', @@ -39,7 +39,7 @@ class XinpianchangIE(InfoExtractor): 'uploader': '精品动画', 'uploader_id': '10858927', 'categories': ['动画', '三维CG'], - 'tags': ['France Télévisions', '法国3台', '蠢萌', '冬奥会'] + 'tags': ['France Télévisions', '法国3台', '蠢萌', '冬奥会'], }, }, { 'url': 'https://www.xinpianchang.com/a11779743?from=IndexPick&part=%E7%BC%96%E8%BE%91%E7%B2%BE%E9%80%89&index=2', diff --git a/yt_dlp/extractor/xminus.py b/yt_dlp/extractor/xminus.py index 37e3104..af9cf40 100644 --- a/yt_dlp/extractor/xminus.py +++ b/yt_dlp/extractor/xminus.py @@ -26,7 +26,7 @@ class XMinusIE(InfoExtractor): 'filesize_approx': 5900000, 'view_count': int, 'description': 'md5:03238c5b663810bc79cf42ef3c03e371', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py index 74d4f04..a1b7e75 100644 --- a/yt_dlp/extractor/xnxx.py +++ b/yt_dlp/extractor/xnxx.py @@ -41,7 +41,7 @@ class XNXXIE(InfoExtractor): def get(meta, default=NO_DEFAULT, fatal=True): return self._search_regex( - r'set%s\s*\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % meta, + rf'set{meta}\s*\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, meta, default=default, fatal=fatal, group='value') title = self._og_search_title( diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py index 322e865..f7b4832 100644 --- a/yt_dlp/extractor/xstream.py +++ b/yt_dlp/extractor/xstream.py @@ -41,8 +41,7 @@ class XstreamIE(InfoExtractor): def _extract_video_info(self, partner_id, video_id): data = self._download_xml( - 'http://frontend.xstream.dk/%s/feed/video/?platform=web&id=%s' - % (partner_id, video_id), + f'http://frontend.xstream.dk/{partner_id}/feed/video/?platform=web&id={video_id}', video_id) NS_MAP = { @@ -71,7 +70,7 @@ class XstreamIE(InfoExtractor): if mobj: formats.append({ 'url': mobj.group('url'), - 'play_path': 'mp4:%s' % mobj.group('playpath'), + 'play_path': 'mp4:{}'.format(mobj.group('playpath')), 'app': mobj.group('app'), 'ext': 'flv', 'tbr': tbr, diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index 6b16ac2..e7d43ba 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, clean_html, @@ -32,7 +32,7 @@ class XVideosIE(InfoExtractor): 'duration': 1238, 'age_limit': 18, 'thumbnail': r're:^https://cdn\d+-pic.xvideos-cdn.com/.+\.jpg', - } + }, }, { # Broken HLS formats 'url': 'https://www.xvideos.com/video65982001/what_s_her_name', @@ -44,7 +44,7 @@ class XVideosIE(InfoExtractor): 'duration': 120, 'age_limit': 18, 'thumbnail': r're:^https://cdn\d+-pic.xvideos-cdn.com/.+\.jpg', - } + }, }, { 'url': 'https://flashservice.xvideos.com/embedframe/4588838', 'only_matching': True, @@ -56,40 +56,40 @@ class XVideosIE(InfoExtractor): 'only_matching': True, }, { 'url': 'http://xvideos.com/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://xvideos.com/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://xvideos.es/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://xvideos.es/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://flashservice.xvideos.com/embedframe/ucuvbkfda4e', 'only_matching': True, @@ -101,7 +101,7 @@ class XVideosIE(InfoExtractor): 'only_matching': True, }, { 'url': 'https://xvideos.es/video.ucuvbkfda4e/a_beautiful_red-haired_stranger_was_refused_but_still_came_to_my_room_for_sex', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -110,7 +110,7 @@ class XVideosIE(InfoExtractor): mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage) if mobj: - raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {clean_html(mobj.group(1))}', expected=True) title = self._html_search_regex( (r'<title>(?P<title>.+?)\s+-\s+XVID', @@ -121,7 +121,7 @@ class XVideosIE(InfoExtractor): thumbnails = [] for preference, thumbnail in enumerate(('', '169')): thumbnail_url = self._search_regex( - r'setThumbUrl%s\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1' % thumbnail, + rf'setThumbUrl{thumbnail}\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1', webpage, 'thumbnail', default=None, group='thumbnail') if thumbnail_url: thumbnails.append({ @@ -137,7 +137,7 @@ class XVideosIE(InfoExtractor): formats = [] - video_url = compat_urllib_parse_unquote(self._search_regex( + video_url = urllib.parse.unquote(self._search_regex( r'flv_url=(.+?)&', webpage, 'video URL', default='')) if video_url: formats.append({ @@ -157,7 +157,7 @@ class XVideosIE(InfoExtractor): elif format_id in ('urllow', 'urlhigh'): formats.append({ 'url': format_url, - 'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]), + 'format_id': '{}-{}'.format(determine_ext(format_url, 'mp4'), format_id[3:]), 'quality': -2 if format_id.endswith('low') else None, }) @@ -184,7 +184,7 @@ class XVideosQuickiesIE(InfoExtractor): 'age_limit': 18, 'duration': 81, 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg', - } + }, }, { 'url': 'https://www.xvideos.com/profiles/lili_love#quickies/a/ipphaob6fd1', 'md5': '5340938aac6b46e19ebdd1d84535862e', @@ -195,7 +195,7 @@ class XVideosQuickiesIE(InfoExtractor): 'age_limit': 18, 'duration': 56, 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg', - } + }, }, { 'url': 'https://www.xvideos.com/amateur-channels/lili_love#quickies/a/hfmffmd7661', 'md5': '92428518bbabcb4c513e55922e022491', @@ -206,7 +206,7 @@ class XVideosQuickiesIE(InfoExtractor): 'age_limit': 18, 'duration': 9, 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg', - } + }, }, { 'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683', 'md5': '16e322a93282667f1963915568f782c1', @@ -217,7 +217,7 @@ class XVideosQuickiesIE(InfoExtractor): 'age_limit': 18, 'duration': 16, 'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py index aa6c84d..003af1d 100644 --- a/yt_dlp/extractor/xxxymovies.py +++ b/yt_dlp/extractor/xxxymovies.py @@ -21,7 +21,7 @@ class XXXYMoviesIE(InfoExtractor): 'like_count': int, 'dislike_count': int, 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 24148a0..35e7120 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -8,6 +8,7 @@ from ..utils import ( ExtractorError, clean_html, int_or_none, + join_nonempty, mimetype2ext, parse_iso8601, traverse_obj, @@ -70,7 +71,7 @@ class YahooIE(InfoExtractor): 'duration': 128, 'timestamp': 1385722202, 'upload_date': '20131129', - } + }, }, { 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html', 'md5': '2a9752f74cb898af5d1083ea9f661b58', @@ -177,7 +178,7 @@ class YahooIE(InfoExtractor): def _extract_yahoo_video(self, video_id, country): video = self._download_json( - 'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id), + f'https://{country}.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["{video_id}"]', video_id, 'Downloading video JSON metadata')[0] title = video['title'] @@ -193,7 +194,7 @@ class YahooIE(InfoExtractor): for fmt in fmts: media_obj = self._download_json( 'https://video-api.yql.yahoo.com/v1/video/sapi/streams/' + video_id, - video_id, 'Downloading %s JSON metadata' % fmt, + video_id, f'Downloading {fmt} JSON metadata', headers=self.geo_verification_headers(), query={ 'format': fmt, 'region': country.upper(), @@ -213,7 +214,7 @@ class YahooIE(InfoExtractor): tbr = int_or_none(s.get('bitrate')) formats.append({ 'url': s_url, - 'format_id': fmt + ('-%d' % tbr if tbr else ''), + 'format_id': join_nonempty(fmt, tbr), 'width': int_or_none(s.get('width')), 'height': int_or_none(s.get('height')), 'tbr': tbr, @@ -277,9 +278,9 @@ class YahooIE(InfoExtractor): country = country.split('-')[0] items = self._download_json( - 'https://%s.yahoo.com/caas/content/article' % country, display_id, + f'https://{country}.yahoo.com/caas/content/article', display_id, 'Downloading content JSON metadata', query={ - 'url': url + 'url': url, })['items'][0] item = items['data']['partnerData'] @@ -327,7 +328,7 @@ class YahooSearchIE(SearchInfoExtractor): def _search_results(self, query): for pagenum in itertools.count(0): - result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (urllib.parse.quote_plus(query), pagenum * 30) + result_url = f'http://video.search.yahoo.com/search/?p={urllib.parse.quote_plus(query)}&fr=screen&o=js&gs=0&b={pagenum * 30}' info = self._download_json(result_url, query, note='Downloading results page ' + str(pagenum + 1)) yield from (self.url_result(result['rurl']) for result in info['results']) @@ -354,7 +355,7 @@ class YahooJapanNewsIE(InfoExtractor): }, }, { 'url': 'https://news.yahoo.co.jp/feature/1356', - 'only_matching': True + 'only_matching': True, }] def _extract_formats(self, json_data, content_id): @@ -371,12 +372,13 @@ class YahooJapanNewsIE(InfoExtractor): url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) else: + bitrate = int_or_none(vid.get('bitrate')) formats.append({ 'url': url, - 'format_id': f'http-{vid.get("bitrate")}', + 'format_id': join_nonempty('http', bitrate), 'height': int_or_none(vid.get('height')), 'width': int_or_none(vid.get('width')), - 'tbr': int_or_none(vid.get('bitrate')), + 'tbr': bitrate, }) self._remove_duplicate_formats(formats) diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index d5eecbd..3214816 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -5,6 +5,7 @@ from ..utils import ( determine_ext, float_or_none, int_or_none, + join_nonempty, mimetype2ext, try_get, urljoin, @@ -102,7 +103,7 @@ class YandexDiskIE(InfoExtractor): 'format_id': 'source', 'ext': determine_ext(title, meta.get('ext') or mimetype2ext(meta.get('mime_type')) or 'mp4'), 'quality': 1, - 'filesize': int_or_none(meta.get('size')) + 'filesize': int_or_none(meta.get('size')), }) for video in (video_streams.get('videos') or []): @@ -116,12 +117,9 @@ class YandexDiskIE(InfoExtractor): else: size = video.get('size') or {} height = int_or_none(size.get('height')) - format_id = 'hls' - if height: - format_id += '-%dp' % height formats.append({ 'ext': 'mp4', - 'format_id': format_id, + 'format_id': join_nonempty('hls', height and f'{height}p'), 'height': height, 'protocol': 'm3u8_native', 'url': format_url, diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py index acfe69b..12cc5ca 100644 --- a/yt_dlp/extractor/yandexmusic.py +++ b/yt_dlp/extractor/yandexmusic.py @@ -2,7 +2,6 @@ import hashlib import itertools from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, float_or_none, @@ -35,19 +34,19 @@ class YandexMusicBaseIE(InfoExtractor): expected=True) def _download_webpage_handle(self, *args, **kwargs): - webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs) + webpage = super()._download_webpage_handle(*args, **kwargs) if 'Нам очень жаль, но запросы, поступившие с вашего IP-адреса, похожи на автоматические.' in webpage: self._raise_captcha() return webpage def _download_json(self, *args, **kwargs): - response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs) + response = super()._download_json(*args, **kwargs) self._handle_error(response) return response def _call_api(self, ep, tld, url, item_id, note, query): return self._download_json( - 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep), + f'https://music.yandex.{tld}/handlers/{ep}.jsx', item_id, note, fatal=False, headers={ @@ -61,7 +60,7 @@ class YandexMusicBaseIE(InfoExtractor): class YandexMusicTrackIE(YandexMusicBaseIE): IE_NAME = 'yandexmusic:track' IE_DESC = 'Яндекс.Музыка - Трек' - _VALID_URL = r'%s/album/(?P<album_id>\d+)/track/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE + _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<album_id>\d+)/track/(?P<id>\d+)' _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508/track/4878838', @@ -110,19 +109,19 @@ class YandexMusicTrackIE(YandexMusicBaseIE): track = self._call_api( 'track', tld, url, track_id, 'Downloading track JSON', - {'track': '%s:%s' % (track_id, album_id)})['track'] + {'track': f'{track_id}:{album_id}'})['track'] track_title = track['title'] download_data = self._download_json( - 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id), + f'https://music.yandex.ru/api/v2.1/handlers/track/{track_id}:{album_id}/web-album_track-track-track-main/download/m', track_id, 'Downloading track location url JSON', query={'hq': 1}, headers={'X-Retpath-Y': url}) fd_data = self._download_json( download_data['src'], track_id, 'Downloading track location JSON', query={'format': 'json'}) - key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest() - f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id']) + key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode()).hexdigest() + f_url = 'http://{}/get-mp3/{}/{}?track-id={} '.format(fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id']) thumbnail = None cover_uri = track.get('albums', [{}])[0].get('coverUri') @@ -151,7 +150,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE): for element in decomposed: if isinstance(element, dict) and element.get('name'): parts.append(element['name']) - elif isinstance(element, compat_str): + elif isinstance(element, str): parts.append(element) return ''.join(parts) @@ -183,7 +182,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE): if track_artist: track_info.update({ 'artist': track_artist, - 'title': '%s - %s' % (track_artist, track_title), + 'title': f'{track_artist} - {track_title}', }) else: track_info['title'] = track_title @@ -194,14 +193,14 @@ class YandexMusicTrackIE(YandexMusicBaseIE): class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): def _extract_tracks(self, source, item_id, url, tld): tracks = source['tracks'] - track_ids = [compat_str(track_id) for track_id in source['trackIds']] + track_ids = [str(track_id) for track_id in source['trackIds']] # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks, # missing tracks should be retrieved manually. if len(tracks) < len(track_ids): - present_track_ids = set([ - compat_str(track['id']) - for track in tracks if track.get('id')]) + present_track_ids = { + str(track['id']) + for track in tracks if track.get('id')} missing_track_ids = [ track_id for track_id in track_ids if track_id not in present_track_ids] @@ -215,10 +214,10 @@ class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): assert missing_track_ids_req missing_tracks = self._call_api( 'track-entries', tld, url, item_id, - 'Downloading missing tracks JSON chunk %d' % (chunk_num + 1), { + f'Downloading missing tracks JSON chunk {chunk_num + 1}', { 'entries': ','.join(missing_track_ids_req), 'lang': tld, - 'external-domain': 'music.yandex.%s' % tld, + 'external-domain': f'music.yandex.{tld}', 'overembed': 'false', 'strict': 'true', }) @@ -245,7 +244,7 @@ class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): if not album_id: continue entries.append(self.url_result( - 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id), + f'http://music.yandex.ru/album/{album_id}/track/{track_id}', ie=YandexMusicTrackIE.ie_key(), video_id=track_id)) return entries @@ -253,7 +252,7 @@ class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): IE_NAME = 'yandexmusic:album' IE_DESC = 'Яндекс.Музыка - Альбом' - _VALID_URL = r'%s/album/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE + _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/album/(?P<id>\d+)' _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508', @@ -283,7 +282,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): @classmethod def suitable(cls, url): - return False if YandexMusicTrackIE.suitable(url) else super(YandexMusicAlbumIE, cls).suitable(url) + return False if YandexMusicTrackIE.suitable(url) else super().suitable(url) def _real_extract(self, url): mobj = self._match_valid_url(url) @@ -297,20 +296,20 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) title = album['title'] - artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str) + artist = try_get(album, lambda x: x['artists'][0]['name'], str) if artist: - title = '%s - %s' % (artist, title) + title = f'{artist} - {title}' year = album.get('year') if year: - title += ' (%s)' % year + title += f' ({year})' - return self.playlist_result(entries, compat_str(album['id']), title) + return self.playlist_result(entries, str(album['id']), title) class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): IE_NAME = 'yandexmusic:playlist' IE_DESC = 'Яндекс.Музыка - Плейлист' - _VALID_URL = r'%s/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' % YandexMusicBaseIE._VALID_URL_BASE + _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)' _TESTS = [{ 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', @@ -348,7 +347,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): 'kinds': playlist_id, 'light': 'true', 'lang': tld, - 'external-domain': 'music.yandex.%s' % tld, + 'external-domain': f'music.yandex.{tld}', 'overembed': 'false', })['playlist'] @@ -356,7 +355,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE): return self.playlist_result( self._build_playlist(tracks), - compat_str(playlist_id), + str(playlist_id), playlist.get('title'), playlist.get('description')) @@ -364,14 +363,14 @@ class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE): def _call_artist(self, tld, url, artist_id): return self._call_api( 'artist', tld, url, artist_id, - 'Downloading artist %s JSON' % self._ARTIST_WHAT, { + f'Downloading artist {self._ARTIST_WHAT} JSON', { 'artist': artist_id, 'what': self._ARTIST_WHAT, 'sort': self._ARTIST_SORT or '', 'dir': '', 'period': '', 'lang': tld, - 'external-domain': 'music.yandex.%s' % tld, + 'external-domain': f'music.yandex.{tld}', 'overembed': 'false', }) @@ -381,7 +380,7 @@ class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE): artist_id = mobj.group('id') data = self._call_artist(tld, url, artist_id) tracks = self._extract_tracks(data, artist_id, url, tld) - title = try_get(data, lambda x: x['artist']['name'], compat_str) + title = try_get(data, lambda x: x['artist']['name'], str) return self.playlist_result( self._build_playlist(tracks), artist_id, title) @@ -389,7 +388,7 @@ class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE): class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE): IE_NAME = 'yandexmusic:artist:tracks' IE_DESC = 'Яндекс.Музыка - Артист - Треки' - _VALID_URL = r'%s/artist/(?P<id>\d+)/tracks' % YandexMusicBaseIE._VALID_URL_BASE + _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/tracks' _TESTS = [{ 'url': 'https://music.yandex.ru/artist/617526/tracks', @@ -410,8 +409,8 @@ class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE): artist_id = mobj.group('id') data = self._call_artist(tld, url, artist_id) tracks = self._extract_tracks(data, artist_id, url, tld) - artist = try_get(data, lambda x: x['artist']['name'], compat_str) - title = '%s - %s' % (artist or artist_id, 'Треки') + artist = try_get(data, lambda x: x['artist']['name'], str) + title = '{} - {}'.format(artist or artist_id, 'Треки') return self.playlist_result( self._build_playlist(tracks), artist_id, title) @@ -419,7 +418,7 @@ class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE): class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE): IE_NAME = 'yandexmusic:artist:albums' IE_DESC = 'Яндекс.Музыка - Артист - Альбомы' - _VALID_URL = r'%s/artist/(?P<id>\d+)/albums' % YandexMusicBaseIE._VALID_URL_BASE + _VALID_URL = rf'{YandexMusicBaseIE._VALID_URL_BASE}/artist/(?P<id>\d+)/albums' _TESTS = [{ 'url': 'https://music.yandex.ru/artist/617526/albums', @@ -447,8 +446,8 @@ class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE): if not album_id: continue entries.append(self.url_result( - 'http://music.yandex.ru/album/%s' % album_id, + f'http://music.yandex.ru/album/{album_id}', ie=YandexMusicAlbumIE.ie_key(), video_id=album_id)) - artist = try_get(data, lambda x: x['artist']['name'], compat_str) - title = '%s - %s' % (artist or artist_id, 'Альбомы') + artist = try_get(data, lambda x: x['artist']['name'], str) + title = '{} - {}'.format(artist or artist_id, 'Альбомы') return self.playlist_result(entries, artist_id, title) diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index 95a9446..cdd32c5 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -89,10 +89,10 @@ class YandexVideoIE(InfoExtractor): title views_count } -}''' % video_id).encode(), fatal=False)), lambda x: x['player']['content']) +}''' % video_id).encode(), fatal=False)), lambda x: x['player']['content']) # noqa: UP031 if not player or player.get('error'): player = self._download_json( - 'https://frontend.vh.yandex.ru/v23/player/%s.json' % video_id, + f'https://frontend.vh.yandex.ru/v23/player/{video_id}.json', video_id, query={ 'stream_options': 'hires', 'disable_trackings': 1, @@ -179,10 +179,10 @@ class YandexVideoPreviewIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) data_raw = self._search_regex(r'window.Ya.__inline_params__\s*=\s*JSON.parse\(\'([^"]+?\\u0022video\\u0022:[^"]+?})\'\);', webpage, 'data_raw') - data_json = self._parse_json(data_raw, id, transform_source=lowercase_escape) + data_json = self._parse_json(data_raw, video_id, transform_source=lowercase_escape) return self.url_result(data_json['video']['url']) @@ -196,7 +196,7 @@ class ZenYandexIE(InfoExtractor): 'title': 'ВОТ ЭТО Focus. Деды Морозы на гидроциклах', 'description': 'md5:8684912f6086f298f8078d4af0e8a600', 'thumbnail': 're:^https://avatars.dzeninfra.ru/', - 'uploader': 'AcademeG DailyStream' + 'uploader': 'AcademeG DailyStream', }, 'params': { 'skip_download': 'm3u8', diff --git a/yt_dlp/extractor/yapfiles.py b/yt_dlp/extractor/yapfiles.py index d6024d9..8d89d1d 100644 --- a/yt_dlp/extractor/yapfiles.py +++ b/yt_dlp/extractor/yapfiles.py @@ -10,7 +10,7 @@ from ..utils import ( class YapFilesIE(InfoExtractor): _WORKING = False _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)' - _VALID_URL = r'https?:%s' % _YAPFILES_URL + _VALID_URL = rf'https?:{_YAPFILES_URL}' _EMBED_REGEX = [rf'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?{_YAPFILES_URL}.*?)\1'] _TESTS = [{ # with hd @@ -42,7 +42,7 @@ class YapFilesIE(InfoExtractor): 'player url', default=None, group='url') if not player_url: - player_url = 'http://api.yapfiles.ru/load/%s/' % video_id + player_url = f'http://api.yapfiles.ru/load/{video_id}/' query = { 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff', 'type': 'json', @@ -58,7 +58,7 @@ class YapFilesIE(InfoExtractor): if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''): raise ExtractorError( - 'Video %s has been removed' % video_id, expected=True) + f'Video {video_id} has been removed', expected=True) playlist = self._download_json( playlist_url, video_id)['player']['main'] diff --git a/yt_dlp/extractor/yappy.py b/yt_dlp/extractor/yappy.py index 5ce647e..171e0f1 100644 --- a/yt_dlp/extractor/yappy.py +++ b/yt_dlp/extractor/yappy.py @@ -27,7 +27,7 @@ class YappyIE(InfoExtractor): 'categories': ['Образование и наука', 'Лайфхак', 'Технологии', 'Арт/искусство'], 'repost_count': int, 'uploader': 'YAPPY', - } + }, }, { 'url': 'https://yappy.media/video/3862451954ad4bd58ae2ccefddb0bd33', 'info_dict': { @@ -43,7 +43,7 @@ class YappyIE(InfoExtractor): 'uploader': 'LENA SHTURMAN', 'upload_date': '20230126', 'thumbnail': 'https://cdn-st.ritm.media/static/pic/user_thumbnails/6e76bb4bbad640b6/9ec84c115b2b1967/1674716171.jpg', - } + }, }] def _real_extract(self, url): @@ -64,13 +64,13 @@ class YappyIE(InfoExtractor): 'url': media_url, 'ext': 'mp4', 'format_note': 'Watermarked' if has_watermark else None, - 'preference': -10 if has_watermark else None + 'preference': -10 if has_watermark else None, }] if media_url else [] if has_watermark: formats.append({ 'url': media_url.replace('-wm.mp4', '.mp4'), - 'ext': 'mp4' + 'ext': 'mp4', }) audio_link = traverse_obj(media_data, ('audio', 'link')) @@ -79,7 +79,7 @@ class YappyIE(InfoExtractor): 'url': audio_link, 'ext': 'mp3', 'acodec': 'mp3', - 'vcodec': 'none' + 'vcodec': 'none', }) return { @@ -97,7 +97,7 @@ class YappyIE(InfoExtractor): 'uploader': traverse_obj(media_data, ('creator', 'firstName')), 'uploader_id': traverse_obj(media_data, ('creator', ('uuid', 'nickname')), get_all=False), 'categories': traverse_obj(media_data, ('categories', ..., 'name')) or None, - 'repost_count': int_or_none(media_data.get('sharingCount')) + 'repost_count': int_or_none(media_data.get('sharingCount')), } diff --git a/yt_dlp/extractor/yle_areena.py b/yt_dlp/extractor/yle_areena.py index dd0e599..796f7f3 100644 --- a/yt_dlp/extractor/yle_areena.py +++ b/yt_dlp/extractor/yle_areena.py @@ -34,8 +34,8 @@ class YleAreenaIE(InfoExtractor): 'timestamp': 1543916210, 'subtitles': {'fin': [{'url': r're:^https?://', 'ext': 'srt'}]}, 'age_limit': 7, - 'webpage_url': 'https://areena.yle.fi/1-4371942' - } + 'webpage_url': 'https://areena.yle.fi/1-4371942', + }, }, { 'url': 'https://areena.yle.fi/1-2158940', @@ -55,8 +55,8 @@ class YleAreenaIE(InfoExtractor): 'timestamp': 1638448202, 'subtitles': {}, 'age_limit': 0, - 'webpage_url': 'https://areena.yle.fi/1-2158940' - } + 'webpage_url': 'https://areena.yle.fi/1-2158940', + }, }, { 'url': 'https://areena.yle.fi/1-64829589', @@ -83,7 +83,7 @@ class YleAreenaIE(InfoExtractor): video_id, headers={ 'origin': 'https://areena.yle.fi', 'referer': 'https://areena.yle.fi/', - 'content-type': 'application/json' + 'content-type': 'application/json', }) # Example title: 'K1, J2: Pouchit | Modernit miehet' diff --git a/yt_dlp/extractor/youjizz.py b/yt_dlp/extractor/youjizz.py index cd12be5..f7ef222 100644 --- a/yt_dlp/extractor/youjizz.py +++ b/yt_dlp/extractor/youjizz.py @@ -18,7 +18,7 @@ class YouJizzIE(InfoExtractor): 'title': 'Zeichentrick 1', 'age_limit': 18, 'duration': 2874, - } + }, }, { 'url': 'http://www.youjizz.com/videos/-2189178.html', 'only_matching': True, diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py index 1f3f98a..fa6b053 100644 --- a/yt_dlp/extractor/youku.py +++ b/yt_dlp/extractor/youku.py @@ -104,7 +104,7 @@ class YoukuIE(InfoExtractor): @staticmethod def get_ysuid(): - return '%d%s' % (int(time.time()), ''.join( + return '{}{}'.format(int(time.time()), ''.join( random.choices(string.ascii_letters, k=3))) def get_format_name(self, fm): @@ -273,7 +273,7 @@ class YoukuShowIE(InfoExtractor): continue _, new_entries = self._extract_entries( 'http://list.youku.com/show/episode', show_id, - note='Downloading playlist data page %d' % (idx + 1), + note=f'Downloading playlist data page {idx + 1}', query={ 'id': page_config['showid'], 'stage': reload_id, diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py index b67cb2e..409ee75 100644 --- a/yt_dlp/extractor/younow.py +++ b/yt_dlp/extractor/younow.py @@ -1,7 +1,6 @@ import itertools from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, format_field, @@ -11,7 +10,7 @@ from ..utils import ( ) CDN_API_BASE = 'https://cdn.younow.com/php/api' -MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE +MOMENT_URL_FORMAT = f'{CDN_API_BASE}/moment/fetch/id=%s' class YouNowLiveIE(InfoExtractor): @@ -38,21 +37,20 @@ class YouNowLiveIE(InfoExtractor): def suitable(cls, url): return (False if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url) - else super(YouNowLiveIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): username = self._match_id(url) data = self._download_json( - 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' - % username, username) + f'https://api.younow.com/php/api/broadcast/info/curId=0/user={username}', username) if data.get('errorCode') != 0: raise ExtractorError(data['errorMsg'], expected=True) uploader = try_get( data, lambda x: x['user']['profileUrlString'], - compat_str) or username + str) or username return { 'id': uploader, @@ -63,13 +61,12 @@ class YouNowLiveIE(InfoExtractor): 'categories': data.get('tags'), 'uploader': uploader, 'uploader_id': data.get('userId'), - 'uploader_url': 'https://www.younow.com/%s' % username, + 'uploader_url': f'https://www.younow.com/{username}', 'creator': uploader, 'view_count': int_or_none(data.get('viewers')), 'like_count': int_or_none(data.get('likes')), 'formats': [{ - 'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s' - % (CDN_API_BASE, data['broadcastId'], data['userId']), + 'url': '{}/broadcast/videoPath/hls=1/broadcastId={}/channelId={}'.format(CDN_API_BASE, data['broadcastId'], data['userId']), 'ext': 'mp4', 'protocol': 'm3u8', }], @@ -83,18 +80,18 @@ def _extract_moment(item, fatal=True): return raise ExtractorError('Unable to extract moment id') - moment_id = compat_str(moment_id) + moment_id = str(moment_id) title = item.get('text') if not title: title = 'YouNow %s' % ( item.get('momentType') or item.get('titleType') or 'moment') - uploader = try_get(item, lambda x: x['owner']['name'], compat_str) + uploader = try_get(item, lambda x: x['owner']['name'], str) uploader_id = try_get(item, lambda x: x['owner']['userId']) uploader_url = format_field(uploader, None, 'https://www.younow.com/%s') - entry = { + return { 'extractor_key': 'YouNowMoment', 'id': moment_id, 'title': title, @@ -106,15 +103,12 @@ def _extract_moment(item, fatal=True): 'uploader_id': str_or_none(uploader_id), 'uploader_url': uploader_url, 'formats': [{ - 'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8' - % (moment_id, moment_id), + 'url': f'https://hls.younow.com/momentsplaylists/live/{moment_id}/{moment_id}.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', }], } - return entry - class YouNowChannelIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel' @@ -122,7 +116,7 @@ class YouNowChannelIE(InfoExtractor): 'url': 'https://www.younow.com/its_Kateee_/channel', 'info_dict': { 'id': '14629760', - 'title': 'its_Kateee_ moments' + 'title': 'its_Kateee_ moments', }, 'playlist_mincount': 8, } @@ -133,9 +127,8 @@ class YouNowChannelIE(InfoExtractor): if created_before is None: break info = self._download_json( - '%s/moment/profile/channelId=%s/createdBefore=%d/records=20' - % (CDN_API_BASE, channel_id, created_before), username, - note='Downloading moments page %d' % page_num) + f'{CDN_API_BASE}/moment/profile/channelId={channel_id}/createdBefore={created_before}/records=20', + username, note=f'Downloading moments page {page_num}') items = info.get('items') if not items or not isinstance(items, list): break @@ -153,7 +146,7 @@ class YouNowChannelIE(InfoExtractor): for moment_id in moments: m = self._download_json( MOMENT_URL_FORMAT % moment_id, username, - note='Downloading %s moment JSON' % moment_id, + note=f'Downloading {moment_id} moment JSON', fatal=False) if m and isinstance(m, dict) and m.get('item'): entry = _extract_moment(m['item']) @@ -163,12 +156,12 @@ class YouNowChannelIE(InfoExtractor): def _real_extract(self, url): username = self._match_id(url) - channel_id = compat_str(self._download_json( - 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' - % username, username, note='Downloading user information')['userId']) + channel_id = str(self._download_json( + f'https://api.younow.com/php/api/broadcast/info/curId=0/user={username}', + username, note='Downloading user information')['userId']) return self.playlist_result( self._entries(username, channel_id), channel_id, - '%s moments' % username) + f'{username} moments') class YouNowMomentIE(InfoExtractor): @@ -193,7 +186,7 @@ class YouNowMomentIE(InfoExtractor): def suitable(cls, url): return (False if YouNowChannelIE.suitable(url) - else super(YouNowMomentIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): video_id = self._match_id(url) diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 0e047aa..4a00dfe 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -90,7 +90,7 @@ class YouPornIE(InfoExtractor): 'timestamp': 1606147564, 'title': 'Tinder In Real Life', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -126,7 +126,7 @@ class YouPornIE(InfoExtractor): for definition in get_format_data(definitions, 'mp4'): f = traverse_obj(definition, { 'url': 'videoUrl', - 'filesize': ('videoSize', {int_or_none}) + 'filesize': ('videoSize', {int_or_none}), }) height = int_or_none(definition.get('quality')) # Video URL's path looks like this: @@ -140,7 +140,7 @@ class YouPornIE(InfoExtractor): height = int(mobj.group('height')) bitrate = int(mobj.group('bitrate')) f.update({ - 'format_id': '%dp-%dk' % (height, bitrate), + 'format_id': f'{height}p-{bitrate}k', 'tbr': bitrate, }) f['height'] = height diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 54da4e3..18e0ee9 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4,6 +4,7 @@ import collections import copy import datetime as dt import enum +import functools import hashlib import itertools import json @@ -20,7 +21,6 @@ import urllib.parse from .common import InfoExtractor, SearchInfoExtractor from .openload import PhantomJSwrapper -from ..compat import functools from ..jsinterp import JSInterpreter from ..networking.exceptions import HTTPError, network_exceptions from ..utils import ( @@ -77,9 +77,9 @@ INNERTUBE_CLIENTS = { 'client': { 'clientName': 'WEB', 'clientVersion': '2.20220801.00.00', - } + }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 1 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, }, 'web_embedded': { 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', @@ -89,7 +89,7 @@ INNERTUBE_CLIENTS = { 'clientVersion': '1.20220731.00.00', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 56 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, }, 'web_music': { 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30', @@ -98,7 +98,7 @@ INNERTUBE_CLIENTS = { 'client': { 'clientName': 'WEB_REMIX', 'clientVersion': '1.20220727.01.00', - } + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, }, @@ -108,7 +108,7 @@ INNERTUBE_CLIENTS = { 'client': { 'clientName': 'WEB_CREATOR', 'clientVersion': '1.20220726.00.00', - } + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, }, @@ -119,11 +119,11 @@ INNERTUBE_CLIENTS = { 'clientName': 'ANDROID', 'clientVersion': '19.09.37', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip' - } + 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'android_embedded': { 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw', @@ -132,11 +132,11 @@ INNERTUBE_CLIENTS = { 'clientName': 'ANDROID_EMBEDDED_PLAYER', 'clientVersion': '19.09.37', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip' + 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 55, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'android_music': { 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI', @@ -145,11 +145,11 @@ INNERTUBE_CLIENTS = { 'clientName': 'ANDROID_MUSIC', 'clientVersion': '6.42.52', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip' - } + 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip', + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'android_creator': { 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8', @@ -158,11 +158,11 @@ INNERTUBE_CLIENTS = { 'clientName': 'ANDROID_CREATOR', 'clientVersion': '22.30.100', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip' + 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 @@ -173,11 +173,11 @@ INNERTUBE_CLIENTS = { 'clientName': 'IOS', 'clientVersion': '19.09.3', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' - } + 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'ios_embedded': { 'INNERTUBE_CONTEXT': { @@ -185,11 +185,11 @@ INNERTUBE_CLIENTS = { 'clientName': 'IOS_MESSAGES_EXTENSION', 'clientVersion': '19.09.3', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' + 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 66, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'ios_music': { 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s', @@ -198,11 +198,11 @@ INNERTUBE_CLIENTS = { 'clientName': 'IOS_MUSIC', 'clientVersion': '6.33.3', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' + 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'ios_creator': { 'INNERTUBE_CONTEXT': { @@ -210,11 +210,11 @@ INNERTUBE_CLIENTS = { 'clientName': 'IOS_CREATOR', 'clientVersion': '22.33.101', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' + 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 @@ -224,9 +224,9 @@ INNERTUBE_CLIENTS = { 'client': { 'clientName': 'MWEB', 'clientVersion': '2.20220801.00.00', - } + }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 2 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, }, # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) # See: https://github.com/zerodytrash/YouTube-Internal-Clients @@ -238,7 +238,7 @@ INNERTUBE_CLIENTS = { 'clientVersion': '2.0', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 85 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, }, # This client has pre-merged video+audio 720p/1080p streams 'mediaconnect': { @@ -248,7 +248,7 @@ INNERTUBE_CLIENTS = { 'clientVersion': '0.1', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 95 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 95, }, } @@ -465,10 +465,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi', 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw', 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml', - 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko' + 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko', ] - _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'} + _IGNORED_WARNINGS = { + 'Unavailable videos will be hidden during playback', + 'Unavailable videos are hidden', + } _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' @@ -698,7 +701,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), - 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client) + 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), } if session_index is None: session_index = self._extract_session_index(ytcfg) @@ -715,7 +718,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): url = { 'web': 'https://www.youtube.com', 'web_music': 'https://music.youtube.com', - 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' + 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1', }.get(client) if not url: return {} @@ -726,7 +729,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): @staticmethod def _build_api_continuation_query(continuation, ctp=None): query = { - 'continuation': continuation + 'continuation': continuation, } # TODO: Inconsistency with clickTrackingParams. # Currently we have a fixed ctp contained within context (from ytcfg) @@ -766,7 +769,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return traverse_obj(renderer, ( ('contents', 'items', 'rows'), ..., 'continuationItemRenderer', - ('continuationEndpoint', ('button', 'buttonRenderer', 'command')) + ('continuationEndpoint', ('button', 'buttonRenderer', 'command')), ), get_all=False, expected_type=cls._extract_continuation_ep_data) @classmethod @@ -793,7 +796,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): for alert_type, alert_message in (warnings + errors[:-1]): self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once) if errors: - raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) + raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected) def _extract_and_report_alerts(self, data, *args, **kwargs): return self._report_alerts(self._extract_alerts(data), *args, **kwargs) @@ -885,14 +888,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return count @staticmethod - def _extract_thumbnails(data, *path_list): + def _extract_thumbnails(data, *path_list, final_key='thumbnails'): """ Extract thumbnails from thumbnails dict @param path_list: path list to level that contains 'thumbnails' key """ thumbnails = [] for path in path_list or [()]: - for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)): + for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)): thumbnail_url = url_or_none(thumbnail.get('url')) if not thumbnail_url: continue @@ -927,7 +930,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if start: return datetime_from_str(start) try: - return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit'))) + return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit'))) except ValueError: return None @@ -1114,13 +1117,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None), view_count_field: view_count, 'live_status': live_status, - 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None + 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None, } class YoutubeIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube' - _VALID_URL = r"""(?x)^ + _VALID_URL = r'''(?x)^ ( (?:https?://|//) # http(s):// or protocol-independent URL (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com| @@ -1129,7 +1132,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com| (?:www\.)?yourepeat\.com| tube\.majestyc\.net| - %(invidious)s| + {invidious}| youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -1145,16 +1148,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): youtu\.be| # just youtu.be/xxxx vid\.plus| # or vid.plus/xxxx zwearz\.com/watch| # or zwearz.com/watch/xxxx - %(invidious)s + {invidious} )/ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= ) )? # all until now is optional -> you can pass the naked ID - (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID + (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID (?(1).+)? # if we found the ID, everything can follow - (?:\#|$)""" % { - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } + (?:\#|$)'''.format( + invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + ) _EMBED_REGEX = [ r'''(?x) (?: @@ -1326,7 +1329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', 'timestamp': 1349198244, - } + }, }, { 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', @@ -1383,7 +1386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'm4a', 'upload_date': '20121002', 'description': '', - 'title': 'UHDTV TEST 8K VIDEO.mp4' + 'title': 'UHDTV TEST 8K VIDEO.mp4', }, 'params': { 'youtube_include_dash_manifest': True, @@ -1591,7 +1594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'expected_warnings': [ 'DASH manifest missing', - ] + ], }, # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) { @@ -1626,7 +1629,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'params': { 'skip_download': 'requires avconv', - } + }, }, # Non-square pixels { @@ -1850,7 +1853,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': True, 'like_count': int, 'age_limit': 0, - 'channel_follower_count': int + 'channel_follower_count': int, }, 'params': { 'skip_download': True, @@ -2111,7 +2114,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', 'tags': 'count:11', 'live_status': 'not_live', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'params': { 'skip_download': True, @@ -2288,7 +2291,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'timestamp': 1405513526, - } + }, }, { # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685 @@ -2323,11 +2326,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, { # Has multiple audio streams 'url': 'WaOKSUlf4TM', - 'only_matching': True + 'only_matching': True, }, { # Requires Premium: has format 141 when requested using YTM url 'url': 'https://music.youtube.com/watch?v=XclachpHxis', - 'only_matching': True + 'only_matching': True, }, { # multiple subtitles with same lang_code 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug', @@ -2412,7 +2415,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_is_verified': True, 'heatmap': 'count:100', 'timestamp': 1395685455, - }, 'params': {'format': 'mhtml', 'skip_download': True} + }, 'params': {'format': 'mhtml', 'skip_download': True}, }, { # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', @@ -2442,7 +2445,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@LeonNguyen', 'heatmap': 'count:100', 'timestamp': 1641170939, - } + }, }, { # date text is premiered video, ensure upload date in UTC (published 1641172509) 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM', @@ -2475,7 +2478,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_is_verified': True, 'heatmap': 'count:100', 'timestamp': 1641172509, - } + }, }, { # continuous livestream. # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00 @@ -2535,7 +2538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Lesmiscore', 'uploader_url': 'https://www.youtube.com/@lesmiscore', 'timestamp': 1648005313, - } + }, }, { # Prefer primary title+description language metadata by default # Do not prefer translated description if primary is empty @@ -2564,7 +2567,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'cole-dlp-test-acc', 'timestamp': 1662677394, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # Extractor argument: prefer translated title+description 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng', @@ -2765,7 +2768,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, ] @@ -2922,7 +2925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not should_continue: known_idx = idx - 1 raise ExtractorError('breaking out of outer loop') - last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx) + last_segment_url = urljoin(fragment_base_url, f'sq/{idx}') yield { 'url': last_segment_url, 'fragment_count': last_seq, @@ -2971,7 +2974,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if id_m: break else: - raise ExtractorError('Cannot identify player %r' % player_url) + raise ExtractorError(f'Cannot identify player {player_url!r}') return id_m.group('id') def _load_player(self, video_id, player_url, fatal=True): @@ -2980,7 +2983,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): code = self._download_webpage( player_url, video_id, fatal=fatal, note='Downloading player ' + player_id, - errnote='Download of %s failed' % player_url) + errnote=f'Download of {player_url} failed') if code: self._code_cache[player_id] = code return self._code_cache.get(player_id) @@ -3041,10 +3044,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): cache_res = func(test_string) cache_spec = [ord(c) for c in cache_res] expr_code = ' + '.join(gen_sig_code(cache_spec)) - signature_id_tuple = '(%s)' % ( - ', '.join(str(len(p)) for p in example_sig.split('.'))) - code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' - ' return %s\n') % (signature_id_tuple, expr_code) + signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.'))) + code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n' + f' return {expr_code}\n') self.to_screen('Extracted signature function:\n' + code) def _parse_sig_js(self, jscode): @@ -3150,9 +3152,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # For redundancy func_code = self._search_regex( - r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s* + rf'''(?xs){func_name}\s*=\s*function\s*\((?P<var>[\w$]+)\)\s* # NB: The end of the regex is intentionally kept strict - {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name, + {{(?P<code>.+?}}\s*return\ [\w$]+.join\(""\))}};''', jscode, 'nsig function', group=('var', 'code'), default=None) if func_code: func_code = ([func_code[0]], func_code[1]) @@ -3218,7 +3220,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # cpn generation algorithm is reverse engineered from base.js. # In fact it works even with dummy cpn. CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' - cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) + cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16)) # # more consistent results setting it to right before the end video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)] @@ -3255,7 +3257,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage) if mobj: yield cls.url_result(mobj.group('url'), cls) - raise cls.StopExtraction() + raise cls.StopExtraction yield from super()._extract_from_webpage(url, webpage) @@ -3280,7 +3282,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): chapter_list = traverse_obj( data, ( 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', - 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters' + 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters', ), expected_type=list) return self._extract_chapters_helper( @@ -3334,7 +3336,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'author_is_uploader': ('author', 'isCreator', {bool}), 'author_is_verified': ('author', 'isVerified', {bool}), 'author_url': ('author', 'channelCommand', 'innertubeCommand', ( - ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url') + ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'), ), {lambda x: urljoin('https://www.youtube.com', x)}), }, get_all=False), 'is_favorited': (None if toolbar_entity_payload is None else @@ -3420,7 +3422,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sort_text = str_or_none(sort_menu_item.get('title')) if not sort_text: sort_text = 'top comments' if comment_sort_index == 0 else 'newest first' - self.to_screen('Sorting comments by %s' % sort_text.lower()) + self.to_screen(f'Sorting comments by {sort_text.lower()}') break return _continuation @@ -3491,15 +3493,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Keeps track of counts across recursive calls if not tracker: - tracker = dict( - running_total=0, - est_total=None, - current_page_thread=0, - total_parent_comments=0, - total_reply_comments=0, - seen_comment_ids=set(), - pinned_comment_ids=set() - ) + tracker = { + 'running_total': 0, + 'est_total': None, + 'current_page_thread': 0, + 'total_parent_comments': 0, + 'total_reply_comments': 0, + 'seen_comment_ids': set(), + 'pinned_comment_ids': set(), + } # TODO: Deprecated # YouTube comments have a max depth of 2 @@ -3510,8 +3512,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if max_depth == 1 and parent: return - max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map( - lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4) + max_comments, max_parents, max_replies, max_replies_per_thread, *_ = ( + int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4) continuation = self._extract_continuation(root_continuation_data) @@ -3540,7 +3542,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): note_prefix = ' Downloading comment API JSON reply thread %d %s' % ( tracker['current_page_thread'], comment_prog_str) else: - note_prefix = '%sDownloading comment%s API JSON page %d %s' % ( + note_prefix = '{}Downloading comment{} API JSON page {} {}'.format( ' ' if parent else '', ' replies' if parent else '', page_num, comment_prog_str) @@ -3627,9 +3629,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): context['signatureTimestamp'] = sts return { 'playbackContext': { - 'contentPlaybackContext': context + 'contentPlaybackContext': context, }, - **cls._get_checkok_params() + **cls._get_checkok_params(), } @staticmethod @@ -3669,7 +3671,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): item_id=video_id, ep='player', query=yt_query, ytcfg=player_ytcfg, headers=headers, fatal=True, default_client=client, - note='Downloading %s player API JSON' % client.replace('_', ' ').strip() + note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()), ) or None def _get_requested_clients(self, url, smuggled_data): @@ -3677,7 +3679,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): android_clients = [] default = ['ios', 'web'] allowed_clients = sorted( - (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'), + (client for client in INNERTUBE_CLIENTS if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): if client == 'default': @@ -3798,6 +3800,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): CHUNK_SIZE = 10 << 20 + PREFERRED_LANG_VALUE = 10 + original_language = None itags, stream_ids = collections.defaultdict(set), [] itag_qualities, res_qualities = {}, {0: None} q = qualities([ @@ -3805,7 +3809,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # audio-only formats with unknown quality may get tagged as tiny 'tiny', 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats - 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' + 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres', ]) streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...)) format_types = self._configuration_arg('formats') @@ -3818,8 +3822,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def build_fragments(f): return LazyList({ 'url': update_url_query(f['url'], { - 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}' - }) + 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}', + }), } for range_start in range(0, f['filesize'], CHUNK_SIZE)) for fmt in streaming_formats: @@ -3846,6 +3850,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): itag_qualities[itag] = quality if height: res_qualities[height] = quality + + is_default = audio_track.get('audioIsDefault') + is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower() + language_code = audio_track.get('id', '').split('.')[0] + if language_code and is_default: + original_language = language_code + # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment # (adding `&sq=0` to the URL) and parsing emsg box to determine the # number of fragment that would subsequently requested with (`&sq=N`) @@ -3860,9 +3871,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not all((sc, fmt_url, player_url, encrypted_sig)): continue try: - fmt_url += '&%s=%s' % ( + fmt_url += '&{}={}'.format( traverse_obj(sc, ('sp', -1)) or 'signature', - self._decrypt_signature(encrypted_sig, video_id, player_url) + self._decrypt_signature(encrypted_sig, video_id, player_url), ) except ExtractorError as e: self.report_warning('Signature extraction failed: Some formats may be missing', @@ -3871,12 +3882,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue query = parse_qs(fmt_url) - throttled = False if query.get('n'): try: decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0]) fmt_url = update_url_query(fmt_url, { - 'n': decrypt_nsig(query['n'][0], video_id, player_url) + 'n': decrypt_nsig(query['n'][0], video_id, player_url), }) except ExtractorError as e: phantomjs_hint = '' @@ -3885,20 +3895,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n') if player_url: self.report_warning( - f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}' + f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}' f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True) self.write_debug(e, only_once=True) else: self.report_warning( - 'Cannot decrypt nsig without player_url: You may experience throttling for some formats', + 'Cannot decrypt nsig without player_url: Some formats may be missing', video_id=video_id, only_once=True) - throttled = True + continue tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) - language_preference = ( - 10 if audio_track.get('audioIsDefault') and 10 - else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 - else -1) format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)})) # Some formats may have much smaller duration than others (possibly damaged during encoding) # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 @@ -3925,17 +3931,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': int_or_none(fmt.get('contentLength')), 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}', 'format_note': join_nonempty( - join_nonempty(audio_track.get('displayName'), - language_preference > 0 and ' (default)', delim=''), + join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''), name, fmt.get('isDrc') and 'DRC', try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), - throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN', + is_damaged and 'DAMAGED', is_broken and 'BROKEN', (self.get_param('verbose') or all_formats) and client_name, delim=', '), # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 - 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1) - + (100 if 'Premium' in name else 0)), + 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0), 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1 'audio_channels': fmt.get('audioChannels'), 'height': height, @@ -3945,9 +3949,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize_approx': filesize_from_tbr(tbr, format_duration), 'url': fmt_url, 'width': int_or_none(fmt.get('width')), - 'language': join_nonempty(audio_track.get('id', '').split('.')[0], - 'desc' if language_preference < -1 else '') or None, - 'language_preference': language_preference, + 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None, + 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1, # Strictly de-prioritize broken, damaged and 3gp formats 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None, } @@ -4008,6 +4011,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif itag: f['format_id'] = itag + if original_language and f.get('language') == original_language: + f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ') + f['language_preference'] = PREFERRED_LANG_VALUE + if f.get('source_preference') is None: f['source_preference'] = -1 @@ -4182,7 +4189,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): expected_type=str) if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'): if self.get_param('noplaylist'): - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + self.to_screen(f'Downloading just video {video_id} because of --no-playlist') else: entries = [] feed_ids = [] @@ -4203,19 +4210,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): feed_title = feed_entry('title') title = video_title if feed_title: - title += ' (%s)' % feed_title + title += f' ({feed_title})' entries.append({ '_type': 'url_transparent', 'ie_key': 'Youtube', 'url': smuggle_url( - '%swatch?v=%s' % (base_url, feed_data['id'][0]), + '{}watch?v={}'.format(base_url, feed_data['id'][0]), {'force_singlefeed': True}), 'title': title, }) feed_ids.append(feed_id) self.to_screen( - 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' - % (', '.join(feed_ids), video_id)) + 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format( + ', '.join(feed_ids), video_id)) return self.playlist_result( entries, video_id, video_title, video_description) @@ -4279,7 +4286,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants # in resolution, these are not the custom thumbnail. So de-prioritize them 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', - 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3' + 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3', ] n_thumbnail_names = len(thumbnail_names) thumbnails.extend({ @@ -4352,8 +4359,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'), 'live_status': live_status, 'release_timestamp': live_start_time, - '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats - 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto') + '_format_sort_fields': ( # source_preference is lower for potentially damaged formats + 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'), } subtitles = {} @@ -4431,7 +4438,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]: d_k += '_time' if d_k not in info and k in s_ks: - info[d_k] = parse_duration(query[k][0]) + info[d_k] = parse_duration(v[0]) # Youtube Music Auto-generated description if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'): @@ -4483,10 +4490,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): info['comment_count'] = traverse_obj(initial_data, ( 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer', - 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount' + 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', ), ( 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section', - 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo' + 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', ), expected_type=self._get_count, get_all=False) try: # This will error if there is no livechat @@ -4716,7 +4723,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): def _extract_basic_item_renderer(item): # Modified from _extract_grid_item_renderer known_basic_renderers = ( - 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer' + 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer', ) for key, renderer in item.items(): if not isinstance(renderer, dict): @@ -4777,7 +4784,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): playlist_id = renderer.get('playlistId') if playlist_id: yield self.url_result( - 'https://www.youtube.com/playlist?list=%s' % playlist_id, + f'https://www.youtube.com/playlist?list={playlist_id}', ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title) continue @@ -4835,7 +4842,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): yield from self._grid_entries(renderer) renderer = content.get('horizontalListRenderer') if renderer: - # TODO + # TODO: handle case pass def _shelf_entries(self, shelf_renderer, skip_channels=False): @@ -4912,7 +4919,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str) if playlist_id: yield self.url_result( - 'https://www.youtube.com/playlist?list=%s' % playlist_id, + f'https://www.youtube.com/playlist?list={playlist_id}', ie=YoutubeTabIE.ie_key(), video_id=playlist_id) # inline video links runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or [] @@ -5065,12 +5072,12 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): continuation_items = traverse_obj(response, ( ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ..., - 'appendContinuationItemsAction', 'continuationItems' + 'appendContinuationItemsAction', 'continuationItems', ), 'continuationContents', get_all=False) continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={}) video_items_renderer = None - for key in continuation_item.keys(): + for key in continuation_item: if key not in known_renderers: continue func, parent_key = known_renderers[key] @@ -5125,6 +5132,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): else: metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict) + # pageHeaderViewModel slow rollout began April 2024 + page_header_view_model = traverse_obj(data, ( + 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict})) + # We can get the uncropped banner/avatar by replacing the crop params with '=s0' # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714 def _get_uncropped(url): @@ -5137,11 +5148,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): avatar_thumbnails.append({ 'url': uncropped_avatar, 'id': 'avatar_uncropped', - 'preference': 1 + 'preference': 1, }) - channel_banners = self._extract_thumbnails( - data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) + channel_banners = ( + self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) + or self._extract_thumbnails( + page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources')) for banner in channel_banners: banner['preference'] = -10 @@ -5151,7 +5164,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): channel_banners.append({ 'url': uncropped_banner, 'id': 'banner_uncropped', - 'preference': -5 + 'preference': -5, }) # Deprecated - remove primary_sidebar_renderer when layout discontinued @@ -5168,7 +5181,11 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or info['id']), 'availability': self._extract_availability(data), - 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), + 'channel_follower_count': ( + self._get_count(data, ('header', ..., 'subscriberCountText')) + or traverse_obj(page_header_view_model, ( + 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts', + lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))), 'description': try_get(metadata_renderer, lambda x: x.get('description', '')), 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str})) or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))), @@ -5221,7 +5238,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): info.update({ 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text), 'channel_id': self.ucid_or_none(browse_ep.get('browseId')), - 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))) + 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))), }) info.update({ @@ -5253,12 +5270,12 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'playlistId': playlist_id, 'videoId': watch_endpoint.get('videoId') or last_id, 'index': watch_endpoint.get('index') or len(videos), - 'params': watch_endpoint.get('params') or 'OAE%3D' + 'params': watch_endpoint.get('params') or 'OAE%3D', } response = self._extract_response( - item_id='%s page %d' % (playlist_id, page_num), + item_id=f'{playlist_id} page {page_num}', query=query, ep='next', headers=headers, ytcfg=ytcfg, - check_get_keys='contents' + check_get_keys='contents', ) playlist = try_get( response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) @@ -5349,7 +5366,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): visitor_data=self._extract_visitor_data(data, ytcfg)) query = { 'params': 'wgYCCAA=', - 'browseId': f'VL{item_id}' + 'browseId': f'VL{item_id}', } return self._extract_response( item_id=item_id, headers=headers, query=query, @@ -5481,7 +5498,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): (?!consent\.)(?:\w+\.)? (?: youtube(?:kids)?\.com| - %(invidious)s + {invidious} )/ (?: (?P<channel_type>channel|c|user|browse)/| @@ -5489,13 +5506,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): feed/|hashtag/| (?:playlist|watch)\?.*?\blist= )| - (?!(?:%(reserved_names)s)\b) # Direct URLs + (?!(?:{reserved_names})\b) # Direct URLs ) (?P<id>[^/?\#&]+) - )''' % { - 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES, - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } + )'''.format( + reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES, + invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + ) IE_NAME = 'youtube:tab' _TESTS = [{ @@ -5513,7 +5530,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', - 'channel_follower_count': int + 'channel_follower_count': int, }, }, { 'note': 'playlists, multipage, different order', @@ -5530,7 +5547,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel': 'Igor Kleiner Ph.D.', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', - 'channel_follower_count': int + 'channel_follower_count': int, }, }, { 'note': 'playlists, series', @@ -5565,8 +5582,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ', 'tags': 'count:12', 'channel': 'ThirstForScience', - 'channel_follower_count': int - } + 'channel_follower_count': int, + }, }, { 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 'only_matching': True, @@ -5621,7 +5638,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@lexwill718', 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 2, }, { @@ -5638,7 +5655,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'uploader_url': 'https://www.youtube.com/@lexwill718', 'channel': 'lex will', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 975, }, { @@ -5655,7 +5672,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': 'lex will', 'tags': ['bible', 'history', 'prophesy'], 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 199, }, { @@ -5672,7 +5689,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'tags': ['bible', 'history', 'prophesy'], - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 17, }, { @@ -5995,11 +6012,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): }, { 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist', 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'only_matching': True + 'only_matching': True, }, { 'note': '/browse/ should redirect to /channel/', 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', - 'only_matching': True + 'only_matching': True, }, { 'note': 'VLPL, should redirect to playlist?list=PL...', 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', @@ -6096,7 +6113,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_id': '@PhilippHagemeister', 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader': 'Philipp Hagemeister', - } + }, }], 'playlist_count': 1, 'params': {'extract_flat': True}, @@ -6111,7 +6128,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'playlist_mincount': 50, 'params': { 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + 'extractor_args': {'youtubetab': {'skip': ['webpage']}}, }, }, { 'note': 'API Fallback: /videos tab, sorted by oldest first', @@ -6124,12 +6141,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', 'tags': [], 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 650, 'params': { 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + 'extractor_args': {'youtubetab': {'skip': ['webpage']}}, }, 'skip': 'Query for sorting no longer works', }, { @@ -6151,13 +6168,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'playlist_mincount': 101, 'params': { 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + 'extractor_args': {'youtubetab': {'skip': ['webpage']}}, }, 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'], }, { 'note': 'non-standard redirect to regional channel', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', - 'only_matching': True + 'only_matching': True, }, { 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")', 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', @@ -6176,7 +6193,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_id': '@pukkandan', 'uploader': 'pukkandan', }, - 'playlist_mincount': 2 + 'playlist_mincount': 2, }, { 'note': 'translated tab name', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists', @@ -6317,7 +6334,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): # No uploads and no UCID given. Should fail with no uploads error # See test_youtube_lists 'url': 'https://www.youtube.com/news', - 'only_matching': True + 'only_matching': True, }, { # No videos tab but has a shorts tab 'url': 'https://www.youtube.com/c/TKFShorts', @@ -6379,7 +6396,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'title': 'SHORT short', 'view_count': int, 'thumbnails': list, - } + }, }], 'params': {'extract_flat': True}, }, { @@ -6387,8 +6404,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live', 'info_dict': { 'id': 'UCQvWX73GQygcwXOTSf_VDVg', - 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live - 'tags': [] + 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live + 'tags': [], }, 'playlist': [{ 'info_dict': { @@ -6406,10 +6423,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': str, 'uploader_id': str, 'channel_is_verified': bool, # this will keep changing - } + }, }], 'params': {'extract_flat': True, 'playlist_items': '1'}, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }, { # Channel renderer metadata. Contains number of videos on the channel 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels', @@ -6442,7 +6459,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PewDiePie', 'uploader_id': '@PewDiePie', 'channel_is_verified': True, - } + }, }], 'params': {'extract_flat': True}, }, { @@ -6614,7 +6631,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): # Handle both video/playlist URLs qs = parse_qs(url) - video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')] + video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list')) if not video_id and mobj['not_channel'].startswith('watch'): if not playlist_id: # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable @@ -6746,15 +6763,15 @@ class YoutubePlaylistIE(InfoExtractor): (?: (?: youtube(?:kids)?\.com| - %(invidious)s + {invidious} ) /.*?\?.*?\blist= )? - (?P<id>%(playlist_id)s) - )''' % { - 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE, - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } + (?P<id>{playlist_id}) + )'''.format( + playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE, + invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + ) IE_NAME = 'youtube:playlist' _TESTS = [{ 'note': 'issue #673', @@ -6854,7 +6871,7 @@ class YoutubePlaylistIE(InfoExtractor): class YoutubeYtBeIE(InfoExtractor): IE_DESC = 'youtu.be' - _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} + _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})' _TESTS = [{ 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5', 'info_dict': { @@ -6880,7 +6897,7 @@ class YoutubeYtBeIE(InfoExtractor): 'availability': 'public', 'duration': 59, 'comment_count': int, - 'channel_follower_count': int + 'channel_follower_count': int, }, 'params': { 'noplaylist': True, @@ -7054,7 +7071,7 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): 'info_dict': { 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', - } + }, }, { 'note': 'Suicide/self-harm search warning', 'url': 'ytsearch1:i hate myself and i wanna die', @@ -7062,7 +7079,7 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): 'info_dict': { 'id': 'i hate myself and i wanna die', 'title': 'i hate myself and i wanna die', - } + }, }] @@ -7077,7 +7094,7 @@ class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): 'info_dict': { 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', - } + }, }] @@ -7091,14 +7108,14 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', - } + }, }, { 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D', 'playlist_mincount': 5, 'info_dict': { 'id': 'python', 'title': 'python', - } + }, }, { 'url': 'https://www.youtube.com/results?search_query=%23cats', 'playlist_mincount': 1, @@ -7137,7 +7154,7 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'uploader': 'Kurzgesagt – In a Nutshell', 'channel_is_verified': True, 'channel_follower_count': int, - } + }, }], 'params': {'extract_flat': True, 'playlist_items': '1'}, 'playlist_mincount': 1, @@ -7162,7 +7179,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'royalty free music', 'title': 'royalty free music', - } + }, }, { 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D', 'playlist_mincount': 30, @@ -7170,7 +7187,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'royalty free music - songs', 'title': 'royalty free music - songs', }, - 'params': {'extract_flat': 'in_playlist'} + 'params': {'extract_flat': 'in_playlist'}, }, { 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists', 'playlist_mincount': 30, @@ -7178,7 +7195,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'royalty free music - community playlists', 'title': 'royalty free music - community playlists', }, - 'params': {'extract_flat': 'in_playlist'} + 'params': {'extract_flat': 'in_playlist'}, }] _SECTIONS = { @@ -7197,7 +7214,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): if params: section = next((k for k, v in self._SECTIONS.items() if v == params), params) else: - section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower() + section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower() params = self._SECTIONS.get(section) if not params: section = None @@ -7217,8 +7234,8 @@ class YoutubeFeedsInfoExtractor(InfoExtractor): YoutubeBaseInfoExtractor._check_login_required(self) @classproperty - def IE_NAME(self): - return f'youtube:{self._FEED_NAME}' + def IE_NAME(cls): + return f'youtube:{cls._FEED_NAME}' def _real_extract(self, url): return self.url_result( @@ -7386,7 +7403,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor): 'chapters': 'count:20', 'comment_count': int, 'heatmap': 'count:100', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/zaiko.py b/yt_dlp/extractor/zaiko.py index 2b6221d..c8c4ec0 100644 --- a/yt_dlp/extractor/zaiko.py +++ b/yt_dlp/extractor/zaiko.py @@ -106,7 +106,7 @@ class ZaikoIE(ZaikoBaseIE): **traverse_obj(player_meta, ('initial_event_info', { 'alt_title': ('title', {str}), })), - 'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)] + 'thumbnails': [{'url': url, 'id': url_basename(url)} for url in thumbnail_urls if url_or_none(url)], } diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py index 2a12aa5..2916367 100644 --- a/yt_dlp/extractor/zapiks.py +++ b/yt_dlp/extractor/zapiks.py @@ -55,11 +55,11 @@ class ZapiksIE(InfoExtractor): r'data-media-id="(\d+)"', webpage, 'video id') playlist = self._download_xml( - 'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id, + f'http://www.zapiks.fr/view/index.php?action=playlist&media_id={video_id}&lang=en', display_id) NS_MAP = { - 'jwplayer': 'http://rss.jwpcdn.com/' + 'jwplayer': 'http://rss.jwpcdn.com/', } def ns(path): diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index 5cc9c5f..161804b 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -2,7 +2,6 @@ import re import uuid from .common import InfoExtractor -from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -27,13 +26,13 @@ class ZattooPlatformBaseIE(InfoExtractor): def _perform_login(self, username, password): try: data = self._download_json( - '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in', + f'{self._host_url()}/zapi/v2/account/login', None, 'Logging in', data=urlencode_postdata({ 'login': username, 'password': password, 'remember': 'true', }), headers={ - 'Referer': '%s/login' % self._host_url(), + 'Referer': f'{self._host_url()}/login', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', }) except ExtractorError as e: @@ -51,9 +50,9 @@ class ZattooPlatformBaseIE(InfoExtractor): # Will setup appropriate cookies self._request_webpage( - '%s/zapi/v3/session/hello' % self._host_url(), None, + f'{self._host_url()}/zapi/v3/session/hello', None, 'Opening session', data=urlencode_postdata({ - 'uuid': compat_str(uuid.uuid4()), + 'uuid': str(uuid.uuid4()), 'lang': 'en', 'app_version': '1.8.2', 'format': 'json', @@ -72,8 +71,7 @@ class ZattooPlatformBaseIE(InfoExtractor): def _extract_cid(self, video_id, channel_name): channel_groups = self._download_json( - '%s/zapi/v2/cached/channels/%s' % (self._host_url(), - self._power_guide_hash), + f'{self._host_url()}/zapi/v2/cached/channels/{self._power_guide_hash}', video_id, 'Downloading channel list', query={'details': False})['channel_groups'] channel_list = [] @@ -90,8 +88,7 @@ class ZattooPlatformBaseIE(InfoExtractor): def _extract_cid_and_video_info(self, video_id): data = self._download_json( - '%s/zapi/v2/cached/program/power_details/%s' % ( - self._host_url(), self._power_guide_hash), + f'{self._host_url()}/zapi/v2/cached/program/power_details/{self._power_guide_hash}', video_id, 'Downloading video information', query={ @@ -113,7 +110,7 @@ class ZattooPlatformBaseIE(InfoExtractor): 'season_number': int_or_none(p.get('s_no')), 'release_year': int_or_none(p.get('year')), 'categories': try_get(p, lambda x: x['c'], list), - 'tags': try_get(p, lambda x: x['g'], list) + 'tags': try_get(p, lambda x: x['g'], list), } return cid, info_dict @@ -123,7 +120,7 @@ class ZattooPlatformBaseIE(InfoExtractor): @returns (ondemand_token, ondemand_type, info_dict) """ data = self._download_json( - '%s/zapi/vod/movies/%s' % (self._host_url(), ondemand_id), + f'{self._host_url()}/zapi/vod/movies/{ondemand_id}', ondemand_id, 'Downloading ondemand information') info_dict = { 'id': ondemand_id, @@ -144,18 +141,18 @@ class ZattooPlatformBaseIE(InfoExtractor): if is_live: postdata_common.update({'timeshift': 10800}) - url = '%s/zapi/watch/live/%s' % (self._host_url(), cid) + url = f'{self._host_url()}/zapi/watch/live/{cid}' elif record_id: - url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id) + url = f'{self._host_url()}/zapi/watch/recording/{record_id}' elif ondemand_id: postdata_common.update({ 'teasable_id': ondemand_id, 'term_token': ondemand_termtoken, - 'teasable_type': ondemand_type + 'teasable_type': ondemand_type, }) - url = '%s/zapi/watch/vod/video' % self._host_url() + url = f'{self._host_url()}/zapi/watch/vod/video' else: - url = '%s/zapi/v3/watch/replay/%s/%s' % (self._host_url(), cid, video_id) + url = f'{self._host_url()}/zapi/v3/watch/replay/{cid}/{video_id}' formats = [] subtitles = {} for stream_type in ('dash', 'hls7'): @@ -163,7 +160,7 @@ class ZattooPlatformBaseIE(InfoExtractor): postdata['stream_type'] = stream_type data = self._download_json( - url, video_id, 'Downloading %s formats' % stream_type.upper(), + url, video_id, f'Downloading {stream_type.upper()} formats', data=urlencode_postdata(postdata), fatal=False) if not data: continue @@ -218,7 +215,7 @@ class ZattooPlatformBaseIE(InfoExtractor): 'title': channel_name, 'is_live': True, 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } def _extract_record(self, record_id): @@ -267,9 +264,9 @@ class ZattooIE(ZattooBaseIE): 'release_year': 2022, 'episode': 'Folge 1655', 'categories': 'count:1', - 'tags': 'count:2' + 'tags': 'count:2', }, - 'params': {'skip_download': 'm3u8'} + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://zattoo.com/program/daserste/210177916', 'only_matching': True, @@ -322,7 +319,7 @@ class ZattooRecordingsIE(ZattooBaseIE): class NetPlusTVBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'netplus' _HOST = 'netplus.tv' - _API_HOST = 'www.%s' % _HOST + _API_HOST = f'www.{_HOST}' class NetPlusTVIE(NetPlusTVBaseIE): @@ -458,7 +455,7 @@ class WalyTVRecordingsIE(WalyTVBaseIE): class BBVTVBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'bbvtv' _HOST = 'bbv-tv.net' - _API_HOST = 'www.%s' % _HOST + _API_HOST = f'www.{_HOST}' class BBVTVIE(BBVTVBaseIE): @@ -504,7 +501,7 @@ class BBVTVRecordingsIE(BBVTVBaseIE): class VTXTVBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'vtxtv' _HOST = 'vtxtv.ch' - _API_HOST = 'www.%s' % _HOST + _API_HOST = f'www.{_HOST}' class VTXTVIE(VTXTVBaseIE): @@ -595,7 +592,7 @@ class GlattvisionTVRecordingsIE(GlattvisionTVBaseIE): class SAKTVBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'saktv' _HOST = 'saktv.ch' - _API_HOST = 'www.%s' % _HOST + _API_HOST = f'www.{_HOST}' class SAKTVIE(SAKTVBaseIE): @@ -686,7 +683,7 @@ class EWETVRecordingsIE(EWETVBaseIE): class QuantumTVBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'quantumtv' _HOST = 'quantum-tv.com' - _API_HOST = 'www.%s' % _HOST + _API_HOST = f'www.{_HOST}' class QuantumTVIE(QuantumTVBaseIE): @@ -777,7 +774,7 @@ class OsnatelTVRecordingsIE(OsnatelTVBaseIE): class EinsUndEinsTVBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = '1und1tv' _HOST = '1und1.tv' - _API_HOST = 'www.%s' % _HOST + _API_HOST = f'www.{_HOST}' class EinsUndEinsTVIE(EinsUndEinsTVBaseIE): diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index c04d51b..a862e25 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( NO_DEFAULT, ExtractorError, @@ -29,11 +28,11 @@ class ZDFBaseIE(InfoExtractor): def _call_api(self, url, video_id, item, api_token=None, referrer=None): headers = {} if api_token: - headers['Api-Auth'] = 'Bearer %s' % api_token + headers['Api-Auth'] = f'Bearer {api_token}' if referrer: headers['Referer'] = referrer return self._download_json( - url, video_id, 'Downloading JSON %s' % item, headers=headers) + url, video_id, f'Downloading JSON {item}', headers=headers) @staticmethod def _extract_subtitles(src): @@ -73,7 +72,7 @@ class ZDFBaseIE(InfoExtractor): f.update({ 'url': format_url, 'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')), - 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)) + 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)), }) new_formats = [f] formats.extend(merge_dicts(f, { @@ -236,7 +235,7 @@ class ZDFIE(ZDFBaseIE): 'timestamp': 1641355200, 'upload_date': '20220105', }, - 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"' + 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"', }, { 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html', 'info_dict': { @@ -270,7 +269,7 @@ class ZDFIE(ZDFBaseIE): t = content['mainVideoContent']['http://zdf.de/rels/target'] ptmd_path = traverse_obj(t, ( (('streams', 'default'), None), - ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template') + ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'), ), get_all=False) if not ptmd_path: raise ExtractorError('Could not extract ptmd_path') @@ -303,7 +302,7 @@ class ZDFIE(ZDFBaseIE): chapters = [{ 'start_time': chap.get('anchorOffset'), 'end_time': next_chap.get('anchorOffset'), - 'title': chap.get('anchorLabel') + 'title': chap.get('anchorLabel'), } for chap, next_chap in zip(chapter_marks, chapter_marks[1:])] return merge_dicts(info, { @@ -312,7 +311,7 @@ class ZDFIE(ZDFBaseIE): 'duration': int_or_none(t.get('duration')), 'timestamp': unified_timestamp(content.get('editorialDate')), 'thumbnails': thumbnails, - 'chapters': chapters or None + 'chapters': chapters or None, }) def _extract_regular(self, url, player, video_id): @@ -322,7 +321,7 @@ class ZDFIE(ZDFBaseIE): def _extract_mobile(self, video_id): video = self._download_json( - 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id, + f'https://zdf-cdn.live.cellular.de/mediathekV2/document/{video_id}', video_id) formats = [] @@ -341,7 +340,7 @@ class ZDFIE(ZDFBaseIE): if isinstance(teaser_bild, dict): for thumbnail_key, thumbnail in teaser_bild.items(): thumbnail_url = try_get( - thumbnail, lambda x: x['url'], compat_str) + thumbnail, lambda x: x['url'], str) if thumbnail_url: thumbnails.append({ 'url': thumbnail_url, @@ -356,7 +355,7 @@ class ZDFIE(ZDFBaseIE): 'description': document.get('beschreibung'), 'duration': int_or_none(document.get('length')), 'timestamp': unified_timestamp(document.get('date')) or unified_timestamp( - try_get(video, lambda x: x['meta']['editorialDate'], compat_str)), + try_get(video, lambda x: x['meta']['editorialDate'], str)), 'thumbnails': thumbnails, 'subtitles': self._extract_subtitles(document), 'formats': formats, @@ -405,10 +404,10 @@ class ZDFChannelIE(ZDFBaseIE): @classmethod def suitable(cls, url): - return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url) + return False if ZDFIE.suitable(url) else super().suitable(url) def _og_search_title(self, webpage, fatal=False): - title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal) + title = super()._og_search_title(webpage, fatal=fatal) return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None def _real_extract(self, url): @@ -417,7 +416,7 @@ class ZDFChannelIE(ZDFBaseIE): webpage = self._download_webpage(url, channel_id) matches = re.finditer( - r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL, + rf'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>{ZDFIE._VALID_URL})\1''', webpage) if self._downloader.params.get('noplaylist', False): @@ -428,11 +427,11 @@ class ZDFChannelIE(ZDFBaseIE): if entry: return entry else: - self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, )) + self.to_screen(f'Downloading playlist {channel_id} - add --no-playlist to download just the main video') def check_video(m): v_ref = self._search_regex( - r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ), + r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["']){}\2[^>]*>)'''.format(m.group('p_id')), webpage, 'check id', default='') v_ref = extract_attributes(v_ref) return v_ref.get('data-target-video-type') != 'novideo' diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index ca79cf0..fb523de 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -3,7 +3,6 @@ import time import uuid from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -38,7 +37,7 @@ class Zee5IE(InfoExtractor): 'display_id': 'adavari-matalaku-ardhale-verule', 'title': 'Adavari Matalaku Ardhale Verule', 'duration': 9360, - 'description': compat_str, + 'description': str, 'alt_title': 'Adavari Matalaku Ardhale Verule', 'uploader': 'Zee Entertainment Enterprises Ltd', 'release_date': '20070427', @@ -47,7 +46,7 @@ class Zee5IE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'episode_number': 0, 'episode': 'Episode 0', - 'tags': list + 'tags': list, }, 'params': { 'format': 'bv', @@ -60,7 +59,7 @@ class Zee5IE(InfoExtractor): 'display_id': 'yoga-se-hoga-bandbudh-aur-budbak', 'title': 'Yoga Se Hoga-Bandbudh aur Budbak', 'duration': 659, - 'description': compat_str, + 'description': str, 'alt_title': 'Yoga Se Hoga-Bandbudh aur Budbak', 'uploader': 'Zee Entertainment Enterprises Ltd', 'release_date': '20150101', @@ -79,22 +78,22 @@ class Zee5IE(InfoExtractor): }, }, { 'url': 'https://www.zee5.com/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.zee5.com/kids/kids-movies/maya-bommalu/0-0-movie_1040370005', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.zee5.com/news/details/jana-sena-chief-pawan-kalyan-shows-slippers-to-ysrcp-leaders/0-0-newsauto_6ettj4242oo0', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.zee5.com/music-videos/details/adhento-gaani-vunnapaatuga-jersey-nani-shraddha-srinath/0-0-56973', - 'only_matching': True + 'only_matching': True, }] _DEVICE_ID = str(uuid.uuid4()) _USER_TOKEN = None @@ -136,10 +135,10 @@ class Zee5IE(InfoExtractor): 'https://launchapi.zee5.com/launch?platform_name=web_app', video_id, note='Downloading access token')['platform_token'] data = { - 'x-access-token': access_token_request['token'] + 'x-access-token': access_token_request['token'], } if self._USER_TOKEN: - data['Authorization'] = 'bearer %s' % self._USER_TOKEN + data['Authorization'] = f'bearer {self._USER_TOKEN}' else: data['X-Z5-Guest-Token'] = self._DEVICE_ID @@ -150,7 +149,7 @@ class Zee5IE(InfoExtractor): 'platform_name': 'desktop_web', 'country': self._USER_COUNTRY or self.get_param('geo_bypass_country') or 'IN', 'check_parental_control': False, - }, headers={'content-type': 'application/json'}, data=json.dumps(data).encode('utf-8')) + }, headers={'content-type': 'application/json'}, data=json.dumps(data).encode()) asset_data = json_data['assetDetails'] show_data = json_data.get('showDetails', {}) if 'premium' in asset_data['business_type']: @@ -186,7 +185,7 @@ class Zee5IE(InfoExtractor): 'season': try_get(show_data, lambda x: x['seasons']['title'], str), 'season_number': int_or_none(try_get(show_data, lambda x: x['seasons'][0]['orderid'])), 'episode_number': int_or_none(try_get(asset_data, lambda x: x['orderid'])), - 'tags': try_get(asset_data, lambda x: x['tags'], list) + 'tags': try_get(asset_data, lambda x: x['tags'], list), } @@ -251,17 +250,17 @@ class Zee5SeriesIE(InfoExtractor): page_num = 0 show_json = self._download_json(show_url, video_id=show_id, headers=headers) for season in show_json.get('seasons') or []: - season_id = try_get(season, lambda x: x['id'], compat_str) + season_id = try_get(season, lambda x: x['id'], str) next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100' while next_url: page_num += 1 episodes_json = self._download_json( next_url, video_id=show_id, headers=headers, - note='Downloading JSON metadata page %d' % page_num) + note=f'Downloading JSON metadata page {page_num}') for episode in try_get(episodes_json, lambda x: x['episode'], list) or []: video_id = episode.get('id') yield self.url_result( - 'zee5:%s' % video_id, + f'zee5:{video_id}', ie=Zee5IE.ie_key(), video_id=video_id) next_url = url_or_none(episodes_json.get('next_episode_api')) diff --git a/yt_dlp/extractor/zeenews.py b/yt_dlp/extractor/zeenews.py index e2cb1e7..cbe0031 100644 --- a/yt_dlp/extractor/zeenews.py +++ b/yt_dlp/extractor/zeenews.py @@ -20,7 +20,7 @@ class ZeeNewsIE(InfoExtractor): 'view_count': int, 'duration': 97, 'description': 'ग्रेटर नोएडा जारचा थाना क्षेत्र के प्याबली में दिनदहाड़े दुकान में घुसकर अज्ञात हमलावरों ने हमला कर', - } + }, }, { 'url': 'https://zeenews.india.com/hindi/india/video/videsh-superfast-queen-elizabeth-iis-funeral-today/1357710', @@ -35,8 +35,8 @@ class ZeeNewsIE(InfoExtractor): 'view_count': int, 'duration': 133, 'description': 'सेगमेंट विदेश सुपराफास्ट में देखिए देश और दुनिया की सभी बड़ी खबरें, वो भी हर खबर फटाफट अंदाज में.', - } - } + }, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/zenporn.py b/yt_dlp/extractor/zenporn.py index 8faa0e3..084c1fc 100644 --- a/yt_dlp/extractor/zenporn.py +++ b/yt_dlp/extractor/zenporn.py @@ -21,7 +21,7 @@ class ZenPornIE(InfoExtractor): 'upload_date': '20230925', 'uploader': 'md5:9fae59847f1f58d1da8f2772016c12f3', 'age_limit': 18, - } + }, }, { 'url': 'https://zenporn.com/video/15570701', 'md5': 'acba0d080d692664fcc8c4e5502b1a67', @@ -35,7 +35,7 @@ class ZenPornIE(InfoExtractor): 'upload_date': '20230921', 'uploader': 'Lois Clarke', 'age_limit': 18, - } + }, }, { 'url': 'https://zenporn.com/video/8531117/amateur-students-having-a-fuck-fest-at-club/', 'md5': '67411256aa9451449e4d29f3be525541', @@ -49,7 +49,7 @@ class ZenPornIE(InfoExtractor): 'upload_date': '20191005', 'uploader': 'Jackopenass', 'age_limit': 18, - } + }, }, { 'url': 'https://zenporn.com/video/15872038/glad-you-came/', 'md5': '296ccab437f5bac6099433768449d8e1', @@ -63,7 +63,7 @@ class ZenPornIE(InfoExtractor): 'upload_date': '20231024', 'uploader': 'Martin Rudenko', 'age_limit': 18, - } + }, }] def _gen_info_url(self, ext_domain, extr_id, lifetime=86400): diff --git a/yt_dlp/extractor/zetland.py b/yt_dlp/extractor/zetland.py index 055a643..2b9df00 100644 --- a/yt_dlp/extractor/zetland.py +++ b/yt_dlp/extractor/zetland.py @@ -23,7 +23,7 @@ class ZetlandDKArticleIE(InfoExtractor): 'description': 'md5:9619d426772c133f5abb26db27f26a01', 'timestamp': 1705377592, 'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14', - } + }, }] @@ -44,7 +44,7 @@ class ZetlandDKArticleIE(InfoExtractor): return merge_dicts({ 'id': display_id, 'formats': formats, - 'uploader_id': uploader_id + 'uploader_id': uploader_id, }, traverse_obj(story_data, { 'title': ((('story_content', 'content', 'title'), 'title'), {str}), 'uploader': ('sharer', 'name'), diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py index 18b22a5..c1a5bf6 100644 --- a/yt_dlp/extractor/zhihu.py +++ b/yt_dlp/extractor/zhihu.py @@ -20,7 +20,7 @@ class ZhihuIE(InfoExtractor): 'view_count': int, 'like_count': int, 'comment_count': int, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index 909a7a3..1685edb 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -112,7 +112,7 @@ class ZingMp3IE(ZingMp3BaseIE): 'subtitles': { 'origin': [{ 'ext': 'lrc', - }] + }], }, 'duration': 255, 'track': 'Xa Mãi Xa', @@ -383,7 +383,7 @@ class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE): 'id': song_id, 'type': 'genre', 'page': page, - 'count': self._PER_PAGE + 'count': self._PER_PAGE, }) def _real_extract(self, url): @@ -446,7 +446,7 @@ class ZingMp3UserIE(ZingMp3BaseIE): 'id': user_id, 'type': 'artist', 'page': page, - 'count': self._PER_PAGE + 'count': self._PER_PAGE, }) def _real_extract(self, url): @@ -569,14 +569,14 @@ class ZingMp3PodcastEpisodeIE(ZingMp3BaseIE): 'info_dict': { 'id': '68Z9W66B', 'title': 'Nhạc Mới Mỗi Ngày', - 'description': 'md5:2875dfa951f8e5356742f1610cf20691' + 'description': 'md5:2875dfa951f8e5356742f1610cf20691', }, 'playlist_mincount': 20, }, { 'url': 'https://zingmp3.vn/cgr/Am-nhac/IWZ980AO.html', 'info_dict': { 'id': 'IWZ980AO', - 'title': 'Âm nhạc' + 'title': 'Âm nhạc', }, 'playlist_mincount': 2, }] @@ -585,7 +585,7 @@ class ZingMp3PodcastEpisodeIE(ZingMp3BaseIE): return self._call_api(url_type, { 'id': eps_id, 'page': page, - 'count': self._PER_PAGE + 'count': self._PER_PAGE, }) def _real_extract(self, url): diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py index e2bf817..fe2db84 100644 --- a/yt_dlp/extractor/zoom.py +++ b/yt_dlp/extractor/zoom.py @@ -109,9 +109,9 @@ class ZoomIE(InfoExtractor): subtitles = {} for _type in ('transcript', 'cc', 'chapter'): - if data.get('%sUrl' % _type): + if data.get(f'{_type}Url'): subtitles[_type] = [{ - 'url': urljoin(base_url, data['%sUrl' % _type]), + 'url': urljoin(base_url, data[f'{_type}Url']), 'ext': 'vtt', }] @@ -126,7 +126,7 @@ class ZoomIE(InfoExtractor): 'format_id': 'view', 'ext': 'mp4', 'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))), - 'preference': 0 + 'preference': 0, }) if data.get('shareMp4Url'): @@ -137,7 +137,7 @@ class ZoomIE(InfoExtractor): 'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))), 'format_id': 'share', 'ext': 'mp4', - 'preference': -1 + 'preference': -1, }) view_with_share_url = data.get('viewMp4WithshareUrl') @@ -149,7 +149,7 @@ class ZoomIE(InfoExtractor): 'url': view_with_share_url, 'format_id': 'view_with_share', 'ext': 'mp4', - 'preference': 1 + 'preference': 1, }) return { diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py index 8d3156d..bfa3fc4 100644 --- a/yt_dlp/extractor/zype.py +++ b/yt_dlp/extractor/zype.py @@ -14,7 +14,7 @@ from ..utils import ( class ZypeIE(InfoExtractor): _ID_RE = r'[\da-fA-F]+' _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)=' - _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE)) + _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % (f'(?P<id>{_ID_RE})')) _EMBED_REGEX = [fr'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?{_COMMON_RE % _ID_RE}.+?)\1'] _TEST = { 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false', @@ -84,11 +84,11 @@ class ZypeIE(InfoExtractor): def get_attr(key): return self._search_regex( - r'\b%s\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1' % key, + rf'\b{key}\s*:\s*([\'"])(?P<val>(?:(?!\1).)+)\1', source, key, group='val') if get_attr('integration') == 'verizon-media': - m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id') + m3u8_url = 'https://content.uplynk.com/{}.m3u8'.format(get_attr('id')) formats, subtitles = self._extract_m3u8_formats_and_subtitles( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') text_tracks = self._search_regex( diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index bda3fb4..a0f3289 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -190,7 +190,7 @@ class Debugger: cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion) raise if cls.ENABLED and stmt.strip(): - if should_ret or not repr(ret) == stmt: + if should_ret or repr(ret) != stmt: cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion) return ret, should_ret return interpret_statement @@ -216,7 +216,7 @@ class JSInterpreter: self.code, self._functions = code, {} self._objects = {} if objects is None else objects - class Exception(ExtractorError): + class Exception(ExtractorError): # noqa: A001 def __init__(self, msg, expr=None, *args, **kwargs): if expr is not None: msg = f'{msg.rstrip()} in: {truncate_string(expr, 50, 50)}' @@ -235,7 +235,7 @@ class JSInterpreter: flags = 0 if not expr: return flags, expr - for idx, ch in enumerate(expr): + for idx, ch in enumerate(expr): # noqa: B007 if ch not in cls._RE_FLAGS: break flags |= cls._RE_FLAGS[ch] @@ -474,7 +474,7 @@ class JSInterpreter: if remaining.startswith('{'): body, expr = self._separate_at_paren(remaining) else: - switch_m = re.match(r'switch\s*\(', remaining) # FIXME + switch_m = re.match(r'switch\s*\(', remaining) # FIXME: ? if switch_m: switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:]) body, expr = self._separate_at_paren(remaining, '}') @@ -585,9 +585,9 @@ class JSInterpreter: return int(expr), should_return elif expr == 'break': - raise JS_Break() + raise JS_Break elif expr == 'continue': - raise JS_Continue() + raise JS_Continue elif expr == 'undefined': return JS_Undefined, should_return elif expr == 'NaN': @@ -667,12 +667,12 @@ class JSInterpreter: self.interpret_expression(v, local_vars, allow_recursion) for v in self._separate(arg_str)] - if obj == str: + if obj is str: if member == 'fromCharCode': assertion(argvals, 'takes one or more arguments') return ''.join(map(chr, argvals)) raise self.Exception(f'Unsupported String method {member}', expr) - elif obj == float: + elif obj is float: if member == 'pow': assertion(len(argvals) == 2, 'takes two arguments') return argvals[0] ** argvals[1] @@ -697,12 +697,12 @@ class JSInterpreter: elif member == 'splice': assertion(isinstance(obj, list), 'must be applied on a list') assertion(argvals, 'takes one or more arguments') - index, howMany = map(int, (argvals + [len(obj)])[:2]) + index, how_many = map(int, ([*argvals, len(obj)])[:2]) if index < 0: index += len(obj) add_items = argvals[2:] res = [] - for i in range(index, min(index + howMany, len(obj))): + for _ in range(index, min(index + how_many, len(obj))): res.append(obj.pop(index)) for i, item in enumerate(add_items): obj.insert(index + i, item) @@ -726,12 +726,12 @@ class JSInterpreter: elif member == 'forEach': assertion(argvals, 'takes one or more arguments') assertion(len(argvals) <= 2, 'takes at-most 2 arguments') - f, this = (argvals + [''])[:2] + f, this = ([*argvals, ''])[:2] return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)] elif member == 'indexOf': assertion(argvals, 'takes one or more arguments') assertion(len(argvals) <= 2, 'takes at-most 2 arguments') - idx, start = (argvals + [0])[:2] + idx, start = ([*argvals, 0])[:2] try: return obj.index(idx, start) except ValueError: diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py index 356712c..1eaa0ee 100644 --- a/yt_dlp/networking/__init__.py +++ b/yt_dlp/networking/__init__.py @@ -30,7 +30,7 @@ except Exception as e: warnings.warn(f'Failed to import "websockets" request handler: {e}' + bug_reports_message()) try: - from . import _curlcffi # noqa: F401 + from . import _curlcffi except ImportError: pass except Exception as e: diff --git a/yt_dlp/networking/_curlcffi.py b/yt_dlp/networking/_curlcffi.py index f2df399..b1f0fb8 100644 --- a/yt_dlp/networking/_curlcffi.py +++ b/yt_dlp/networking/_curlcffi.py @@ -207,7 +207,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get( self._get_request_target(request)), interface=self.source_address, - stream=True + stream=True, ) except curl_cffi.requests.errors.RequestsError as e: if e.code == CurlECode.PEER_FAILED_VERIFICATION: diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py index 8e678b2..fe3354e 100644 --- a/yt_dlp/networking/_helper.py +++ b/yt_dlp/networking/_helper.py @@ -235,7 +235,7 @@ def create_socks_proxy_socket(dest_addr, proxy_args, proxy_ip_addr, timeout, sou connect_proxy_args = proxy_args.copy() connect_proxy_args.update({'addr': sa[0], 'port': sa[1]}) sock.setproxy(**connect_proxy_args) - if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: # noqa: E721 + if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: sock.settimeout(timeout) if source_address: sock.bind(source_address) @@ -251,7 +251,7 @@ def create_connection( timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None, *, - _create_socket_func=_socket_connect + _create_socket_func=_socket_connect, ): # Work around socket.create_connection() which tries all addresses from getaddrinfo() including IPv6. # This filters the addresses based on the given source_address. diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 6397a2c..86850c1 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import contextlib import functools import http.client @@ -21,8 +23,8 @@ urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.s if urllib3_version < (1, 26, 17): raise ImportError('Only urllib3 >= 1.26.17 is supported') -if requests.__build__ < 0x023100: - raise ImportError('Only requests >= 2.31.0 is supported') +if requests.__build__ < 0x023202: + raise ImportError('Only requests >= 2.32.2 is supported') import requests.adapters import requests.utils @@ -58,13 +60,13 @@ from .exceptions import ( from ..socks import ProxyError as SocksProxyError SUPPORTED_ENCODINGS = [ - 'gzip', 'deflate' + 'gzip', 'deflate', ] if brotli is not None: SUPPORTED_ENCODINGS.append('br') -""" +''' Override urllib3's behavior to not convert lower-case percent-encoded characters to upper-case during url normalization process. @@ -79,7 +81,7 @@ is best to avoid it in requests too for compatability reasons. 1: https://tools.ietf.org/html/rfc3986#section-2.1 2: https://github.com/streamlink/streamlink/pull/4003 -""" +''' class Urllib3PercentREOverride: @@ -96,7 +98,7 @@ class Urllib3PercentREOverride: # urllib3 >= 1.25.8 uses subn: # https://github.com/urllib3/urllib3/commit/a2697e7c6b275f05879b60f593c5854a816489f0 -import urllib3.util.url # noqa: E305 +import urllib3.util.url if hasattr(urllib3.util.url, 'PERCENT_RE'): urllib3.util.url.PERCENT_RE = Urllib3PercentREOverride(urllib3.util.url.PERCENT_RE) @@ -105,7 +107,7 @@ elif hasattr(urllib3.util.url, '_PERCENT_RE'): # urllib3 >= 2.0.0 else: warnings.warn('Failed to patch PERCENT_RE in urllib3 (does the attribute exist?)' + bug_reports_message()) -""" +''' Workaround for issue in urllib.util.ssl_.py: ssl_wrap_context does not pass server_hostname to SSLContext.wrap_socket if server_hostname is an IP, however this is an issue because we set check_hostname to True in our SSLContext. @@ -114,7 +116,7 @@ Monkey-patching IS_SECURETRANSPORT forces ssl_wrap_context to pass server_hostna This has been fixed in urllib3 2.0+. See: https://github.com/urllib3/urllib3/issues/517 -""" +''' if urllib3_version < (2, 0, 0): with contextlib.suppress(Exception): @@ -135,7 +137,7 @@ class RequestsResponseAdapter(Response): self._requests_response = res - def read(self, amt: int = None): + def read(self, amt: int | None = None): try: # Interact with urllib3 response directly. return self.fp.read(amt, decode_content=True) @@ -182,14 +184,9 @@ class RequestsHTTPAdapter(requests.adapters.HTTPAdapter): return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs) # Skip `requests` internal verification; we use our own SSLContext - # requests 2.31.0+ def cert_verify(*args, **kwargs): pass - # requests 2.31.0-2.32.1 - def _get_connection(self, request, *_, proxies=None, **__): - return self.get_connection(request.url, proxies) - # requests 2.32.2+: Reimplementation without `_urllib3_request_context` def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None): url = urllib3.util.parse_url(request.url).url @@ -233,9 +230,7 @@ class Urllib3LoggingFilter(logging.Filter): def filter(self, record): # Ignore HTTP request messages since HTTPConnection prints those - if record.msg == '%s://%s:%s "%s %s %s" %s %s': - return False - return True + return record.msg != '%s://%s:%s "%s %s %s" %s %s' class Urllib3LoggingHandler(logging.Handler): @@ -334,7 +329,7 @@ class RequestsRH(RequestHandler, InstanceStoreMixin): timeout=self._calculate_timeout(request), proxies=self._get_proxies(request), allow_redirects=True, - stream=True + stream=True, ) except requests.exceptions.TooManyRedirects as e: @@ -416,7 +411,7 @@ class SocksProxyManager(urllib3.PoolManager): super().__init__(num_pools, headers, **connection_pool_kw) self.pool_classes_by_scheme = { 'http': SocksHTTPConnectionPool, - 'https': SocksHTTPSConnectionPool + 'https': SocksHTTPSConnectionPool, } diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py index ff110dc..6299582 100644 --- a/yt_dlp/networking/_urllib.py +++ b/yt_dlp/networking/_urllib.py @@ -246,8 +246,8 @@ class ProxyHandler(urllib.request.BaseHandler): def __init__(self, proxies=None): self.proxies = proxies # Set default handlers - for type in ('http', 'https', 'ftp'): - setattr(self, '%s_open' % type, lambda r, meth=self.proxy_open: meth(r)) + for scheme in ('http', 'https', 'ftp'): + setattr(self, f'{scheme}_open', lambda r, meth=self.proxy_open: meth(r)) def proxy_open(self, req): proxy = select_proxy(req.get_full_url(), self.proxies) @@ -385,12 +385,12 @@ class UrllibRH(RequestHandler, InstanceStoreMixin): url=request.url, data=request.data, headers=dict(headers), - method=request.method + method=request.method, ) opener = self._get_instance( proxies=self._get_proxies(request), - cookiejar=self._get_cookiejar(request) + cookiejar=self._get_cookiejar(request), ) try: res = opener.open(urllib_req, timeout=self._calculate_timeout(request)) diff --git a/yt_dlp/networking/_websockets.py b/yt_dlp/networking/_websockets.py index 6e235b0..15db4fe 100644 --- a/yt_dlp/networking/_websockets.py +++ b/yt_dlp/networking/_websockets.py @@ -1,6 +1,7 @@ from __future__ import annotations import contextlib +import functools import io import logging import ssl @@ -22,7 +23,6 @@ from .exceptions import ( TransportError, ) from .websocket import WebSocketRequestHandler, WebSocketResponse -from ..compat import functools from ..dependencies import websockets from ..socks import ProxyError as SocksProxyError from ..utils import int_or_none @@ -137,7 +137,7 @@ class WebsocketsRH(WebSocketRequestHandler): wsuri = parse_uri(request.url) create_conn_kwargs = { 'source_address': (self.source_address, 0) if self.source_address else None, - 'timeout': timeout + 'timeout': timeout, } proxy = select_proxy(request.url, self._get_proxies(request)) try: @@ -147,12 +147,12 @@ class WebsocketsRH(WebSocketRequestHandler): address=(socks_proxy_options['addr'], socks_proxy_options['port']), _create_socket_func=functools.partial( create_socks_proxy_socket, (wsuri.host, wsuri.port), socks_proxy_options), - **create_conn_kwargs + **create_conn_kwargs, ) else: sock = create_connection( address=(wsuri.host, wsuri.port), - **create_conn_kwargs + **create_conn_kwargs, ) conn = websockets.sync.client.connect( sock=sock, diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py index d473e16..a6db167 100644 --- a/yt_dlp/networking/common.py +++ b/yt_dlp/networking/common.py @@ -83,8 +83,8 @@ class RequestDirector: rh: sum(pref(rh, request) for pref in self.preferences) for rh in self.handlers.values() } - self._print_verbose('Handler preferences for this request: %s' % ', '.join( - f'{rh.RH_NAME}={pref}' for rh, pref in preferences.items())) + self._print_verbose('Handler preferences for this request: {}'.format(', '.join( + f'{rh.RH_NAME}={pref}' for rh, pref in preferences.items()))) return sorted(self.handlers.values(), key=preferences.get, reverse=True) def _print_verbose(self, msg): @@ -224,11 +224,11 @@ class RequestHandler(abc.ABC): headers: HTTPHeaderDict = None, cookiejar: YoutubeDLCookieJar = None, timeout: float | int | None = None, - proxies: dict = None, - source_address: str = None, + proxies: dict | None = None, + source_address: str | None = None, verbose: bool = False, prefer_system_certs: bool = False, - client_cert: dict[str, str | None] = None, + client_cert: dict[str, str | None] | None = None, verify: bool = True, legacy_ssl_support: bool = False, **_, @@ -341,7 +341,7 @@ class RequestHandler(abc.ABC): """Handle a request from start to finish. Redefine in subclasses.""" pass - def close(self): + def close(self): # noqa: B027 pass @classproperty @@ -378,11 +378,11 @@ class Request: self, url: str, data: RequestData = None, - headers: typing.Mapping = None, - proxies: dict = None, - query: dict = None, - method: str = None, - extensions: dict = None + headers: typing.Mapping | None = None, + proxies: dict | None = None, + query: dict | None = None, + method: str | None = None, + extensions: dict | None = None, ): self._headers = HTTPHeaderDict() @@ -508,8 +508,8 @@ class Response(io.IOBase): url: str, headers: Mapping[str, str], status: int = 200, - reason: str = None, - extensions: dict = None + reason: str | None = None, + extensions: dict | None = None, ): self.fp = fp @@ -527,7 +527,7 @@ class Response(io.IOBase): def readable(self): return self.fp.readable() - def read(self, amt: int = None) -> bytes: + def read(self, amt: int | None = None) -> bytes: # Expected errors raised here should be of type RequestError or subclasses. # Subclasses should redefine this method with more precise error handling. try: diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py index 9037f18..daa9fb5 100644 --- a/yt_dlp/networking/exceptions.py +++ b/yt_dlp/networking/exceptions.py @@ -13,7 +13,7 @@ class RequestError(YoutubeDLError): self, msg: str | None = None, cause: Exception | str | None = None, - handler: RequestHandler = None + handler: RequestHandler = None, ): self.handler = handler self.cause = cause diff --git a/yt_dlp/networking/impersonate.py b/yt_dlp/networking/impersonate.py index ca66180..0626b3b 100644 --- a/yt_dlp/networking/impersonate.py +++ b/yt_dlp/networking/impersonate.py @@ -112,8 +112,8 @@ class ImpersonateRequestHandler(RequestHandler, ABC): return supported_target @classproperty - def supported_targets(self) -> tuple[ImpersonateTarget, ...]: - return tuple(self._SUPPORTED_IMPERSONATE_TARGET_MAP.keys()) + def supported_targets(cls) -> tuple[ImpersonateTarget, ...]: + return tuple(cls._SUPPORTED_IMPERSONATE_TARGET_MAP.keys()) def is_supported_target(self, target: ImpersonateTarget): assert isinstance(target, ImpersonateTarget) @@ -127,7 +127,7 @@ class ImpersonateRequestHandler(RequestHandler, ABC): headers = self._merge_headers(request.headers) if self._get_request_target(request) is not None: # remove all headers present in std_headers - # todo: change this to not depend on std_headers + # TODO: change this to not depend on std_headers for k, v in std_headers.items(): if headers.get(k) == v: headers.pop(k) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 9615bfb..1b18575 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -40,7 +40,7 @@ from .utils import ( from .version import CHANNEL, __version__ -def parseOpts(overrideArguments=None, ignore_config_files='if_override'): +def parseOpts(overrideArguments=None, ignore_config_files='if_override'): # noqa: N803 PACKAGE_NAME = 'yt-dlp' root = Config(create_parser()) @@ -264,7 +264,7 @@ def create_parser(): except Exception as err: raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}') for key in keys: - out_dict[key] = out_dict.get(key, []) + [val] if append else val + out_dict[key] = [*out_dict.get(key, []), val] if append else val setattr(parser.values, option.dest, out_dict) def when_prefix(default): @@ -474,14 +474,14 @@ def create_parser(): 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress', 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', - 'prefer-legacy-http-handler', 'manifest-filesize-approx' + 'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext', }, 'aliases': { 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], '2021': ['2022', 'no-certifi', 'filename-sanitization'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': [], - } + }, }, help=( 'Options that can help keep compatibility with youtube-dl or youtube-dlc ' 'configurations by reverting some of the changes made in yt-dlp. ' @@ -541,7 +541,7 @@ def create_parser(): network.add_option( '--enable-file-urls', action='store_true', dest='enable_file_urls', default=False, - help='Enable file:// URLs. This is disabled by default for security reasons.' + help='Enable file:// URLs. This is disabled by default for security reasons.', ) geo = optparse.OptionGroup(parser, 'Geo-restriction') @@ -646,7 +646,7 @@ def create_parser(): 'You can also simply specify a field to match if the field is present, ' 'use "!field" to check if the field is not present, and "&" to check multiple conditions. ' 'Use a "\\" to escape "&" or quotes if needed. If used multiple times, ' - 'the filter matches if atleast one of the conditions are met. E.g. --match-filter ' + 'the filter matches if at least one of the conditions is met. E.g. --match-filter ' '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" ' 'matches only videos that are not live OR those that have a like count more than 100 ' '(or the like field is not available) and also has a description ' @@ -682,7 +682,7 @@ def create_parser(): help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it') selection.add_option( '--no-download-archive', - dest='download_archive', action="store_const", const=None, + dest='download_archive', action='store_const', const=None, help='Do not use archive file (default)') selection.add_option( '--max-downloads', @@ -1046,7 +1046,7 @@ def create_parser(): callback_kwargs={ 'allowed_keys': 'http|ftp|m3u8|dash|rtsp|rtmp|mms', 'default_key': 'default', - 'process': str.strip + 'process': str.strip, }, help=( 'Name or path of the external downloader to use (optionally) prefixed by ' 'the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. ' @@ -1060,9 +1060,9 @@ def create_parser(): metavar='NAME:ARGS', dest='external_downloader_args', default={}, type='str', action='callback', callback=_dict_from_options_callback, callback_kwargs={ - 'allowed_keys': r'ffmpeg_[io]\d*|%s' % '|'.join(map(re.escape, list_external_downloaders())), + 'allowed_keys': r'ffmpeg_[io]\d*|{}'.format('|'.join(map(re.escape, list_external_downloaders()))), 'default_key': 'default', - 'process': shlex.split + 'process': shlex.split, }, help=( 'Give these arguments to the external downloader. ' 'Specify the downloader name and the arguments separated by a colon ":". ' @@ -1251,7 +1251,7 @@ def create_parser(): action='callback', callback=_dict_from_options_callback, callback_kwargs={ 'allowed_keys': '(download|postprocess)(-title)?', - 'default_key': 'download' + 'default_key': 'download', }, help=( 'Template for progress outputs, optionally prefixed with one of "download:" (default), ' '"download-title:" (the console title), "postprocess:", or "postprocess-title:". ' @@ -1317,8 +1317,8 @@ def create_parser(): metavar='[TYPES:]PATH', dest='paths', default={}, type='str', action='callback', callback=_dict_from_options_callback, callback_kwargs={ - 'allowed_keys': 'home|temp|%s' % '|'.join(map(re.escape, OUTTMPL_TYPES.keys())), - 'default_key': 'home' + 'allowed_keys': 'home|temp|{}'.format('|'.join(map(re.escape, OUTTMPL_TYPES.keys()))), + 'default_key': 'home', }, help=( 'The paths where the files should be downloaded. ' 'Specify the type of file and the path separated by a colon ":". ' @@ -1333,7 +1333,7 @@ def create_parser(): action='callback', callback=_dict_from_options_callback, callback_kwargs={ 'allowed_keys': '|'.join(map(re.escape, OUTTMPL_TYPES.keys())), - 'default_key': 'default' + 'default_key': 'default', }, help='Output filename template; see "OUTPUT TEMPLATE" for details') filesystem.add_option( '--output-na-placeholder', @@ -1479,7 +1479,7 @@ def create_parser(): 'Optionally, the KEYRING used for decrypting Chromium cookies on Linux, ' 'the name/path of the PROFILE to load cookies from, ' 'and the CONTAINER name (if Firefox) ("none" for no container) ' - 'can be given with their respective seperators. ' + 'can be given with their respective separators. ' 'By default, all containers of the most recently accessed profile are used. ' f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}')) filesystem.add_option( @@ -1575,7 +1575,7 @@ def create_parser(): 'allowed_keys': r'\w+(?:\+\w+)?', 'default_key': 'default-compat', 'process': shlex.split, - 'multiple_keys': False + 'multiple_keys': False, }, help=( 'Give these arguments to the postprocessors. ' 'Specify the postprocessor/executable name and the arguments separated by a colon ":" ' @@ -1724,8 +1724,8 @@ def create_parser(): '--convert-subs', '--convert-sub', '--convert-subtitles', metavar='FORMAT', dest='convertsubtitles', default=None, help=( - 'Convert the subtitles to another format (currently supported: %s) ' - '(Alias: --convert-subtitles)' % ', '.join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)))) + 'Convert the subtitles to another format (currently supported: {}) ' + '(Alias: --convert-subtitles)'.format(', '.join(sorted(FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS))))) postproc.add_option( '--convert-thumbnails', metavar='FORMAT', dest='convertthumbnails', default=None, @@ -1772,7 +1772,7 @@ def create_parser(): action='callback', callback=_list_from_options_callback, callback_kwargs={ 'delim': None, - 'process': lambda val: dict(_postprocessor_opts_parser(*val.split(':', 1))) + 'process': lambda val: dict(_postprocessor_opts_parser(*val.split(':', 1))), }, help=( 'The (case sensitive) name of plugin postprocessors to be enabled, ' 'and (optionally) arguments to be passed to it, separated by a colon ":". ' @@ -1781,7 +1781,7 @@ def create_parser(): 'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' '"video" (after --format; before --print/--output), "before_dl" (before each video download), ' '"post_process" (after each video download; default), ' - '"after_move" (after moving video file to it\'s final locations), ' + '"after_move" (after moving video file to its final locations), ' '"after_video" (after downloading and processing all formats of a video), ' 'or "playlist" (at end of playlist). ' 'This option can be used multiple times to add different postprocessors')) @@ -1794,7 +1794,7 @@ def create_parser(): dest='sponsorblock_mark', default=set(), action='callback', type='str', callback=_set_from_options_callback, callback_kwargs={ 'allowed_values': SponsorBlockPP.CATEGORIES.keys(), - 'aliases': {'default': ['all']} + 'aliases': {'default': ['all']}, }, help=( 'SponsorBlock categories to create chapters for, separated by commas. ' f'Available categories are {", ".join(SponsorBlockPP.CATEGORIES.keys())}, all and default (=all). ' @@ -1808,7 +1808,7 @@ def create_parser(): # Note: From https://wiki.sponsor.ajay.app/w/Types: # The filler category is very aggressive. # It is strongly recommended to not use this in a client by default. - 'aliases': {'default': ['all', '-filler']} + 'aliases': {'default': ['all', '-filler']}, }, help=( 'SponsorBlock categories to be removed from the video file, separated by commas. ' 'If a category is present in both mark and remove, remove takes precedence. ' @@ -1879,7 +1879,7 @@ def create_parser(): extractor.add_option( '--hls-split-discontinuity', dest='hls_split_discontinuity', action='store_true', default=False, - help='Split HLS playlists to different formats at discontinuities such as ad breaks' + help='Split HLS playlists to different formats at discontinuities such as ad breaks', ) extractor.add_option( '--no-hls-split-discontinuity', @@ -1894,7 +1894,7 @@ def create_parser(): callback_kwargs={ 'multiple_keys': False, 'process': lambda val: dict( - _extractor_arg_parser(*arg.split('=', 1)) for arg in val.split(';')) + _extractor_arg_parser(*arg.split('=', 1)) for arg in val.split(';')), }, help=( 'Pass ARGS arguments to the IE_KEY extractor. See "EXTRACTOR ARGUMENTS" for details. ' 'You can use this option multiple times to give arguments for different extractors')) diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index bfe9df7..164540b 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -43,5 +43,5 @@ def get_postprocessor(key): globals().update(_PLUGIN_CLASSES) -__all__ = [name for name in globals().keys() if name.endswith('PP')] +__all__ = [name for name in globals() if name.endswith('PP')] __all__.extend(('PostProcessor', 'FFmpegPostProcessor')) diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index 8cef86c..eeeece8 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -65,7 +65,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass): def to_screen(self, text, prefix=True, *args, **kwargs): if self._downloader: - tag = '[%s] ' % self.PP_NAME if prefix else '' + tag = f'[{self.PP_NAME}] ' if prefix else '' return self._downloader.to_screen(f'{tag}{text}', *args, **kwargs) def report_warning(self, text, *args, **kwargs): @@ -127,7 +127,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass): if allowed[format_type]: return func(self, info) else: - self.to_screen('Skipping %s' % format_type) + self.to_screen(f'Skipping {format_type}') return [], info return wrapper return decorator @@ -174,7 +174,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass): self._progress_hooks.append(ph) def report_progress(self, s): - s['_default_template'] = '%(postprocessor)s %(status)s' % s + s['_default_template'] = '%(postprocessor)s %(status)s' % s # noqa: UP031 if not self._downloader: return diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 9c53729..f2228ac 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -13,7 +13,6 @@ from ..utils import ( check_executable, encodeArgument, encodeFilename, - error_to_compat_str, prepend_extension, shell_quote, ) @@ -48,7 +47,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): if mobj is None: return guess() except PostProcessingError as err: - self.report_warning('unable to find the thumbnail resolution; %s' % error_to_compat_str(err)) + self.report_warning(f'unable to find the thumbnail resolution; {err}') return guess() return int(mobj.group('w')), int(mobj.group('h')) @@ -104,12 +103,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor): old_stream, new_stream = self.get_stream_number( filename, ('tags', 'mimetype'), mimetype) if old_stream is not None: - options.extend(['-map', '-0:%d' % old_stream]) + options.extend(['-map', f'-0:{old_stream}']) new_stream -= 1 options.extend([ '-attach', self._ffmpeg_filename_argument(thumbnail_filename), - '-metadata:s:%d' % new_stream, 'mimetype=%s' % mimetype, - '-metadata:s:%d' % new_stream, 'filename=cover.%s' % thumbnail_ext]) + f'-metadata:s:{new_stream}', f'mimetype={mimetype}', + f'-metadata:s:{new_stream}', f'filename=cover.{thumbnail_ext}']) self._report_run('ffmpeg', filename) self.run_ffmpeg(filename, temp_filename, options) @@ -120,19 +119,26 @@ class EmbedThumbnailPP(FFmpegPostProcessor): if not mutagen or prefer_atomicparsley: success = False else: + self._report_run('mutagen', filename) + f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG} try: - self._report_run('mutagen', filename) + with open(thumbnail_filename, 'rb') as thumbfile: + thumb_data = thumbfile.read() + + type_ = imghdr.what(h=thumb_data) + if not type_: + raise ValueError('could not determine image type') + elif type_ not in f: + raise ValueError(f'incompatible image type: {type_}') + meta = MP4(filename) # NOTE: the 'covr' atom is a non-standard MPEG-4 atom, # Apple iTunes 'M4A' files include the 'moov.udta.meta.ilst' atom. - f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}[imghdr.what(thumbnail_filename)] - with open(thumbnail_filename, 'rb') as thumbfile: - thumb_data = thumbfile.read() meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f)] meta.save() temp_filename = filename except Exception as err: - self.report_warning('unable to embed using mutagen; %s' % error_to_compat_str(err)) + self.report_warning(f'unable to embed using mutagen; {err}') success = False # Method 2: Use AtomicParsley @@ -157,13 +163,14 @@ class EmbedThumbnailPP(FFmpegPostProcessor): cmd += [encodeArgument(o) for o in self._configuration_args('AtomicParsley')] self._report_run('atomicparsley', filename) - self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd)) + self.write_debug(f'AtomicParsley command line: {shell_quote(cmd)}') stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if returncode: self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}') + success = False # for formats that don't support thumbnails (like 3gp) AtomicParsley # won't create to the temporary file - if 'No changes' in stdout: + elif 'No changes' in stdout: self.report_warning('The file format doesn\'t support embedding a thumbnail') success = False @@ -178,9 +185,9 @@ class EmbedThumbnailPP(FFmpegPostProcessor): old_stream, new_stream = self.get_stream_number( filename, ('disposition', 'attached_pic'), 1) if old_stream is not None: - options.extend(['-map', '-0:%d' % old_stream]) + options.extend(['-map', f'-0:{old_stream}']) new_stream -= 1 - options.extend(['-disposition:%s' % new_stream, 'attached_pic']) + options.extend([f'-disposition:{new_stream}', 'attached_pic']) self._report_run('ffmpeg', filename) self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) @@ -196,7 +203,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): f = {'opus': OggOpus, 'flac': FLAC, 'ogg': OggVorbis}[info['ext']](filename) pic = Picture() - pic.mime = 'image/%s' % imghdr.what(thumbnail_filename) + pic.mime = f'image/{imghdr.what(thumbnail_filename)}' with open(thumbnail_filename, 'rb') as thumbfile: pic.data = thumbfile.read() pic.type = 3 # front cover diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py index c2e73fb..1f0a001 100644 --- a/yt_dlp/postprocessor/exec.py +++ b/yt_dlp/postprocessor/exec.py @@ -1,6 +1,5 @@ from .common import PostProcessor -from ..compat import compat_shlex_quote -from ..utils import Popen, PostProcessingError, variadic +from ..utils import Popen, PostProcessingError, shell_quote, variadic class ExecPP(PostProcessor): @@ -19,7 +18,7 @@ class ExecPP(PostProcessor): if filepath: if '{}' not in cmd: cmd += ' {}' - cmd = cmd.replace('{}', compat_shlex_quote(filepath)) + cmd = cmd.replace('{}', shell_quote(filepath)) return cmd def run(self, info): diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 7d7f3f0..164c46d 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -1,5 +1,6 @@ import collections import contextvars +import functools import itertools import json import os @@ -8,7 +9,7 @@ import subprocess import time from .common import PostProcessor -from ..compat import functools, imghdr +from ..compat import imghdr from ..utils import ( MEDIA_EXTENSIONS, ISO639Utils, @@ -61,7 +62,7 @@ ACODECS = { def create_mapping_re(supported): - return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:%s)\s*' % '|'.join(supported))) + return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:{})\s*'.format('|'.join(supported)))) def resolve_mapping(source, mapping): @@ -119,7 +120,7 @@ class FFmpegPostProcessor(PostProcessor): filename = os.path.basename(location) basename = next((p for p in programs if p in filename), 'ffmpeg') dirname = os.path.dirname(os.path.abspath(location)) - if basename in self._ffmpeg_to_avconv.keys(): + if basename in self._ffmpeg_to_avconv: self._prefer_ffmpeg = True paths = {p: os.path.join(dirname, p) for p in programs} @@ -169,12 +170,12 @@ class FFmpegPostProcessor(PostProcessor): @functools.cached_property def basename(self): - self._version # run property + _ = self._version # run property return self.basename @functools.cached_property def probe_basename(self): - self._probe_version # run property + _ = self._probe_version # run property return self.probe_basename def _get_version(self, kind): @@ -342,7 +343,7 @@ class FFmpegPostProcessor(PostProcessor): cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] def make_args(file, args, name, number): - keys = ['_%s%d' % (name, number), '_%s' % name] + keys = [f'_{name}{number}', f'_{name}'] if name == 'o': args += ['-movflags', '+faststart'] if number == 1: @@ -359,7 +360,7 @@ class FFmpegPostProcessor(PostProcessor): make_args(path, list(opts), arg_type, i + 1) for i, (path, opts) in enumerate(path_opts) if path) - self.write_debug('ffmpeg command line: %s' % shell_quote(cmd)) + self.write_debug(f'ffmpeg command line: {shell_quote(cmd)}') _, stderr, returncode = Popen.run( cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) if returncode not in variadic(expected_retcodes): @@ -437,7 +438,7 @@ class FFmpegPostProcessor(PostProcessor): class FFmpegExtractAudioPP(FFmpegPostProcessor): - COMMON_AUDIO_EXTS = MEDIA_EXTENSIONS.common_audio + ('wma', ) + COMMON_AUDIO_EXTS = (*MEDIA_EXTENSIONS.common_audio, 'wma') SUPPORTED_EXTS = tuple(ACODECS.keys()) FORMAT_RE = create_mapping_re(('best', *SUPPORTED_EXTS)) @@ -474,7 +475,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): acodec_opts = [] else: acodec_opts = ['-acodec', codec] - opts = ['-vn'] + acodec_opts + more_opts + opts = ['-vn', *acodec_opts, *more_opts] try: FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts) except FFmpegPostProcessorError as err: @@ -523,7 +524,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): temp_path = prepend_extension(path, 'temp') if (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)) and os.path.exists(encodeFilename(orig_path))): - self.to_screen('Post-process file %s exists, skipping' % new_path) + self.to_screen(f'Post-process file {new_path} exists, skipping') return [], information self.to_screen(f'Destination: {new_path}') @@ -641,7 +642,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): if not sub_langs: return [], info - input_files = [filename] + sub_filenames + input_files = [filename, *sub_filenames] opts = [ *self.stream_copy_opts(ext=info['ext']), @@ -650,15 +651,15 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): '-map', '-0:s', ] for i, (lang, name) in enumerate(zip(sub_langs, sub_names)): - opts.extend(['-map', '%d:0' % (i + 1)]) + opts.extend(['-map', f'{i + 1}:0']) lang_code = ISO639Utils.short2long(lang) or lang - opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) + opts.extend([f'-metadata:s:s:{i}', f'language={lang_code}']) if name: - opts.extend(['-metadata:s:s:%d' % i, 'handler_name=%s' % name, - '-metadata:s:s:%d' % i, 'title=%s' % name]) + opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}', + f'-metadata:s:s:{i}', f'title={name}']) temp_filename = prepend_extension(filename, 'temp') - self.to_screen('Embedding subtitles in "%s"' % filename) + self.to_screen(f'Embedding subtitles in "{filename}"') self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) os.replace(temp_filename, filename) @@ -707,7 +708,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): return [], info temp_filename = prepend_extension(filename, 'temp') - self.to_screen('Adding metadata to "%s"' % filename) + self.to_screen(f'Adding metadata to "{filename}"') self.run_ffmpeg_multiple_files( (filename, metadata_filename), temp_filename, itertools.chain(self._options(info['ext']), *options)) @@ -728,7 +729,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000) chapter_title = chapter.get('title') if chapter_title: - metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title) + metadata_file_content += f'title={ffmpeg_escape(chapter_title)}\n' f.write(metadata_file_content) yield ('-map_metadata', '1') @@ -738,7 +739,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): def add(meta_list, info_list=None): value = next(( - info[key] for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) + info[key] for key in [f'{meta_prefix}_', *variadic(info_list or meta_list)] if info.get(key) is not None), None) if value not in ('', None): value = ', '.join(map(str, variadic(value))) @@ -807,7 +808,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') if old_stream is not None: - yield ('-map', '-0:%d' % old_stream) + yield ('-map', f'-0:{old_stream}') new_stream -= 1 yield ( @@ -834,8 +835,8 @@ class FFmpegMergerPP(FFmpegPostProcessor): args.extend([f'-bsf:a:{audio_streams}', 'aac_adtstoasc']) audio_streams += 1 if fmt.get('vcodec') != 'none': - args.extend(['-map', '%u:v:0' % (i)]) - self.to_screen('Merging formats into "%s"' % filename) + args.extend(['-map', f'{i}:v:0']) + self.to_screen(f'Merging formats into "{filename}"') self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return info['__files_to_merge'], info @@ -848,10 +849,9 @@ class FFmpegMergerPP(FFmpegPostProcessor): required_version = '10-0' if is_outdated_version( self._versions[self.basename], required_version): - warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, ' + warning = (f'Your copy of {self.basename} is outdated and unable to properly mux separate video and audio files, ' 'yt-dlp will download single file media. ' - 'Update %s to version %s or newer to fix this.') % ( - self.basename, self.basename, required_version) + f'Update {self.basename} to version {required_version} or newer to fix this.') self.report_warning(warning) return False return True @@ -873,7 +873,7 @@ class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor): stretched_ratio = info.get('stretched_ratio') if stretched_ratio not in (None, 1): self._fixup('Fixing aspect ratio', info['filepath'], [ - *self.stream_copy_opts(), '-aspect', '%f' % stretched_ratio]) + *self.stream_copy_opts(), '-aspect', f'{stretched_ratio:f}']) return [], info @@ -925,7 +925,7 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor): opts = ['-vf', 'setpts=PTS-STARTPTS'] else: opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS'] - self._fixup('Fixing frame timestamp', info['filepath'], opts + [*self.stream_copy_opts(False), '-ss', self.trim]) + self._fixup('Fixing frame timestamp', info['filepath'], [*opts, *self.stream_copy_opts(False), '-ss', self.trim]) return [], info @@ -970,7 +970,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): continue ext = sub['ext'] if ext == new_ext: - self.to_screen('Subtitle file for %s is already in the requested format' % new_ext) + self.to_screen(f'Subtitle file for {new_ext} is already in the requested format') continue elif ext == 'json': self.to_screen( @@ -1060,7 +1060,7 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): in_file = info['filepath'] if self._force_keyframes and len(chapters) > 1: in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters)) - self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters)) + self.to_screen(f'Splitting video by chapters; {len(chapters)} chapters found') for idx, chapter in enumerate(chapters): destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())]) @@ -1087,7 +1087,7 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): _, thumbnail_ext = os.path.splitext(thumbnail_filename) if thumbnail_ext: if thumbnail_ext.lower() != '.webp' and imghdr.what(thumbnail_filename) == 'webp': - self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename) + self.to_screen(f'Correcting thumbnail "{thumbnail_filename}" extension to webp') webp_filename = replace_extension(thumbnail_filename, 'webp') os.replace(thumbnail_filename, webp_filename) info['thumbnails'][idx]['filepath'] = webp_filename diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index f521986..d82685e 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -54,7 +54,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): self.write_debug('Expected and actual durations mismatch') concat_opts = self._make_concat_opts(cuts, real_duration) - self.write_debug('Concat spec = %s' % ', '.join(f'{c.get("inpoint", 0.0)}-{c.get("outpoint", "inf")}' for c in concat_opts)) + self.write_debug('Concat spec = {}'.format(', '.join(f'{c.get("inpoint", 0.0)}-{c.get("outpoint", "inf")}' for c in concat_opts))) def remove_chapters(file, is_sub): return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub) diff --git a/yt_dlp/postprocessor/movefilesafterdownload.py b/yt_dlp/postprocessor/movefilesafterdownload.py index 23b0924..35e8705 100644 --- a/yt_dlp/postprocessor/movefilesafterdownload.py +++ b/yt_dlp/postprocessor/movefilesafterdownload.py @@ -34,16 +34,15 @@ class MoveFilesAfterDownloadPP(PostProcessor): if os.path.abspath(encodeFilename(oldfile)) == os.path.abspath(encodeFilename(newfile)): continue if not os.path.exists(encodeFilename(oldfile)): - self.report_warning('File "%s" cannot be found' % oldfile) + self.report_warning(f'File "{oldfile}" cannot be found') continue if os.path.exists(encodeFilename(newfile)): if self.get_param('overwrites', True): - self.report_warning('Replacing existing file "%s"' % newfile) + self.report_warning(f'Replacing existing file "{newfile}"') os.remove(encodeFilename(newfile)) else: self.report_warning( - 'Cannot move file "%s" out of temporary directory since "%s" already exists. ' - % (oldfile, newfile)) + f'Cannot move file "{oldfile}" out of temporary directory since "{newfile}" already exists. ') continue make_dir(newfile, PostProcessingError) self.to_screen(f'Moving file "{oldfile}" to "{newfile}"') diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index ff50d5b..525b639 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -35,7 +35,7 @@ class SponSkrubPP(PostProcessor): if not ignoreerror and self.path is None: if path: - raise PostProcessingError('sponskrub not found in "%s"' % path) + raise PostProcessingError(f'sponskrub not found in "{path}"') else: raise PostProcessingError('sponskrub not found. Please install or provide the path using --sponskrub-path') @@ -83,7 +83,7 @@ class SponSkrubPP(PostProcessor): cmd += ['--', information['id'], filename, temp_filename] cmd = [encodeArgument(i) for i in cmd] - self.write_debug('sponskrub command line: %s' % shell_quote(cmd)) + self.write_debug(f'sponskrub command line: {shell_quote(cmd)}') stdout, _, returncode = Popen.run(cmd, text=True, stdout=None if self.get_param('verbose') else subprocess.PIPE) if not returncode: diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 6ba87cd..6cf9ab6 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -27,7 +27,7 @@ class SponsorBlockPP(FFmpegPostProcessor): 'filler': 'Filler Tangent', 'interaction': 'Interaction Reminder', 'music_offtopic': 'Non-Music Section', - **NON_SKIPPABLE_CATEGORIES + **NON_SKIPPABLE_CATEGORIES, } def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'): @@ -57,7 +57,7 @@ class SponsorBlockPP(FFmpegPostProcessor): if start_end[0] <= 1: start_end[0] = 0 # Make POI chapters 1 sec so that we can properly mark them - if s['category'] in self.POI_CATEGORIES.keys(): + if s['category'] in self.POI_CATEGORIES: start_end[1] += 1 # Ignore milliseconds difference at the end. # Never allow the segment to exceed the video. @@ -91,12 +91,12 @@ class SponsorBlockPP(FFmpegPostProcessor): return sponsor_chapters def _get_sponsor_segments(self, video_id, service): - hash = hashlib.sha256(video_id.encode('ascii')).hexdigest() + video_hash = hashlib.sha256(video_id.encode('ascii')).hexdigest() # SponsorBlock API recommends using first 4 hash characters. - url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + urllib.parse.urlencode({ + url = f'{self._API_URL}/api/skipSegments/{video_hash[:4]}?' + urllib.parse.urlencode({ 'service': service, 'categories': json.dumps(self._categories), - 'actionTypes': json.dumps(['skip', 'poi', 'chapter']) + 'actionTypes': json.dumps(['skip', 'poi', 'chapter']), }) for d in self._download_json(url) or []: if d['videoID'] == video_id: diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py index b4957ac..e553a5f 100644 --- a/yt_dlp/socks.py +++ b/yt_dlp/socks.py @@ -60,8 +60,8 @@ class ProxyError(OSError): class InvalidVersionError(ProxyError): def __init__(self, expected_version, got_version): - msg = ('Invalid response version from server. Expected {:02x} got ' - '{:02x}'.format(expected_version, got_version)) + msg = (f'Invalid response version from server. Expected {expected_version:02x} got ' + f'{got_version:02x}') super().__init__(0, msg) @@ -71,7 +71,7 @@ class Socks4Error(ProxyError): CODES = { 91: 'request rejected or failed', 92: 'request rejected because SOCKS server cannot connect to identd on the client', - 93: 'request rejected because the client program and identd report different user-ids' + 93: 'request rejected because the client program and identd report different user-ids', } @@ -88,7 +88,7 @@ class Socks5Error(ProxyError): 0x07: 'Command not supported', 0x08: 'Address type not supported', 0xFE: 'unknown username or invalid password', - 0xFF: 'all offered authentication methods were rejected' + 0xFF: 'all offered authentication methods were rejected', } diff --git a/yt_dlp/update.py b/yt_dlp/update.py index ca70f69..8c6790d 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -13,7 +13,7 @@ from dataclasses import dataclass from zipimport import zipimporter from .compat import functools # isort: split -from .compat import compat_realpath, compat_shlex_quote +from .compat import compat_realpath from .networking import Request from .networking.exceptions import HTTPError, network_exceptions from .utils import ( @@ -200,7 +200,7 @@ class UpdateInfo: requested_version: str | None = None commit: str | None = None - binary_name: str | None = _get_binary_name() + binary_name: str | None = _get_binary_name() # noqa: RUF009: Always returns the same value checksum: str | None = None _has_update = True @@ -381,7 +381,7 @@ class Updater: has_update = False resolved_tag = requested_version if self.requested_tag == 'latest' else self.requested_tag - current_label = _make_label(self._origin, self._channel.partition("@")[2] or self.current_version, self.current_version) + current_label = _make_label(self._origin, self._channel.partition('@')[2] or self.current_version, self.current_version) requested_label = _make_label(self.requested_repo, resolved_tag, requested_version) latest_or_requested = f'{"Latest" if self.requested_tag == "latest" else "Requested"} version: {requested_label}' if not has_update: @@ -515,7 +515,7 @@ class Updater: os.chmod(self.filename, mask) except OSError: return self._report_error( - f'Unable to set permissions. Run: sudo chmod a+rx {compat_shlex_quote(self.filename)}') + f'Unable to set permissions. Run: sudo chmod a+rx {shell_quote(self.filename)}') self.ydl.to_screen(f'Updated yt-dlp to {update_label}') return True @@ -559,7 +559,7 @@ class Updater: tag = self.requested_tag self._report_error( f'Unable to {action}{delim} visit https://github.com/{self.requested_repo}/releases/' - + tag if tag == "latest" else f"tag/{tag}", True) + + tag if tag == 'latest' else f'tag/{tag}', True) # XXX: Everything below this line in this class is deprecated / for compat only @property diff --git a/yt_dlp/utils/_legacy.py b/yt_dlp/utils/_legacy.py index a23248b..356e580 100644 --- a/yt_dlp/utils/_legacy.py +++ b/yt_dlp/utils/_legacy.py @@ -17,7 +17,7 @@ from ..networking._urllib import HTTPHandler # isort: split from .networking import escape_rfc3986 # noqa: F401 -from .networking import normalize_url as escape_url # noqa: F401 +from .networking import normalize_url as escape_url from .networking import random_user_agent, std_headers # noqa: F401 from ..cookies import YoutubeDLCookieJar # noqa: F401 from ..networking._urllib import PUTRequest # noqa: F401 @@ -167,7 +167,7 @@ def decode_png(png_data): chunks.append({ 'type': chunk_type, 'length': length, - 'data': chunk_data + 'data': chunk_data, }) ihdr = chunks[0]['data'] @@ -195,15 +195,15 @@ def decode_png(png_data): return pixels[y][x] for y in range(height): - basePos = y * (1 + stride) - filter_type = decompressed_data[basePos] + base_pos = y * (1 + stride) + filter_type = decompressed_data[base_pos] current_row = [] pixels.append(current_row) for x in range(stride): - color = decompressed_data[1 + basePos + x] + color = decompressed_data[1 + base_pos + x] basex = y * stride + x left = 0 up = 0 diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 42803bb..b5e1e29 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -53,7 +53,7 @@ from ..compat import ( ) from ..dependencies import xattr -__name__ = __name__.rsplit('.', 1)[0] # Pretend to be the parent module +__name__ = __name__.rsplit('.', 1)[0] # noqa: A001: Pretend to be the parent module # This is not clearly defined otherwise compiled_regex_type = type(re.compile('')) @@ -90,7 +90,7 @@ TIMEZONE_NAMES = { 'EST': -5, 'EDT': -4, # Eastern 'CST': -6, 'CDT': -5, # Central 'MST': -7, 'MDT': -6, # Mountain - 'PST': -8, 'PDT': -7 # Pacific + 'PST': -8, 'PDT': -7, # Pacific } # needed for sanitizing filenames in restricted mode @@ -215,7 +215,7 @@ def write_json_file(obj, fn): def find_xpath_attr(node, xpath, key, val=None): """ Find the xpath xpath[@key=val] """ assert re.match(r'^[a-zA-Z_-]+$', key) - expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']") + expr = xpath + (f'[@{key}]' if val is None else f"[@{key}='{val}']") return node.find(expr) # On python2.6 the xml.etree.ElementTree.Element methods don't support @@ -230,7 +230,7 @@ def xpath_with_ns(path, ns_map): replaced.append(c[0]) else: ns, tag = c - replaced.append('{%s}%s' % (ns_map[ns], tag)) + replaced.append(f'{{{ns_map[ns]}}}{tag}') return '/'.join(replaced) @@ -251,7 +251,7 @@ def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT): return default elif fatal: name = xpath if name is None else name - raise ExtractorError('Could not find XML element %s' % name) + raise ExtractorError(f'Could not find XML element {name}') else: return None return n @@ -266,7 +266,7 @@ def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT): return default elif fatal: name = xpath if name is None else name - raise ExtractorError('Could not find XML element\'s text %s' % name) + raise ExtractorError(f'Could not find XML element\'s text {name}') else: return None return n.text @@ -279,7 +279,7 @@ def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT): return default elif fatal: name = f'{xpath}[@{key}]' if name is None else name - raise ExtractorError('Could not find XML attribute %s' % name) + raise ExtractorError(f'Could not find XML attribute {name}') else: return None return n.attrib[key] @@ -320,14 +320,14 @@ def get_element_html_by_attribute(attribute, value, html, **kargs): def get_elements_by_class(class_name, html, **kargs): """Return the content of all tags with the specified class in the passed HTML document as a list""" return get_elements_by_attribute( - 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name), + 'class', rf'[^\'"]*(?<=[\'"\s]){re.escape(class_name)}(?=[\'"\s])[^\'"]*', html, escape_value=False) def get_elements_html_by_class(class_name, html): """Return the html of all tags with the specified class in the passed HTML document as a list""" return get_elements_html_by_attribute( - 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name), + 'class', rf'[^\'"]*(?<=[\'"\s]){re.escape(class_name)}(?=[\'"\s])[^\'"]*', html, escape_value=False) @@ -364,7 +364,7 @@ def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w yield ( unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)), - whole + whole, ) @@ -407,7 +407,7 @@ class HTMLBreakOnClosingTagParser(html.parser.HTMLParser): else: raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found') if not self.tagstack: - raise self.HTMLBreakOnClosingTagException() + raise self.HTMLBreakOnClosingTagException # XXX: This should be far less strict @@ -587,7 +587,7 @@ def sanitize_open(filename, open_mode): # FIXME: An exclusive lock also locks the file from being read. # Since windows locks are mandatory, don't lock the file on windows (for now). # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124 - raise LockingUnsupportedError() + raise LockingUnsupportedError stream = locked_file(filename, open_mode, block=False).__enter__() except OSError: stream = open(filename, open_mode) @@ -717,9 +717,9 @@ def extract_basic_auth(url): return url, None url = urllib.parse.urlunsplit(parts._replace(netloc=( parts.hostname if parts.port is None - else '%s:%d' % (parts.hostname, parts.port)))) + else f'{parts.hostname}:{parts.port}'))) auth_payload = base64.b64encode( - ('%s:%s' % (parts.username, parts.password or '')).encode()) + ('{}:{}'.format(parts.username, parts.password or '')).encode()) return url, f'Basic {auth_payload.decode()}' @@ -758,7 +758,7 @@ def _htmlentity_transform(entity_with_semicolon): numstr = mobj.group(1) if numstr.startswith('x'): base = 16 - numstr = '0%s' % numstr + numstr = f'0{numstr}' else: base = 10 # See https://github.com/ytdl-org/youtube-dl/issues/7518 @@ -766,7 +766,7 @@ def _htmlentity_transform(entity_with_semicolon): return chr(int(numstr, base)) # Unknown entity in name, return its literal representation - return '&%s;' % entity + return f'&{entity};' def unescapeHTML(s): @@ -970,7 +970,7 @@ class ExtractorError(YoutubeDLError): class UnsupportedError(ExtractorError): def __init__(self, url): super().__init__( - 'Unsupported URL: %s' % url, expected=True) + f'Unsupported URL: {url}', expected=True) self.url = url @@ -1367,7 +1367,7 @@ class DateRange: else: self.end = dt.datetime.max.date() if self.start > self.end: - raise ValueError('Date range: "%s" , the start date must be before the end date' % self) + raise ValueError(f'Date range: "{self}" , the start date must be before the end date') @classmethod def day(cls, day): @@ -1400,7 +1400,7 @@ def system_identifier(): with contextlib.suppress(OSError): # We may not have access to the executable libc_ver = platform.libc_ver() - return 'Python %s (%s %s %s) - %s (%s%s)' % ( + return 'Python {} ({} {} {}) - {} ({}{})'.format( platform.python_version(), python_implementation, platform.machine(), @@ -1413,7 +1413,7 @@ def system_identifier(): @functools.cache def get_windows_version(): - ''' Get Windows version. returns () if it's not running on Windows ''' + """ Get Windows version. returns () if it's not running on Windows """ if compat_os_name == 'nt': return version_tuple(platform.win32_ver()[1]) else: @@ -1505,7 +1505,7 @@ if sys.platform == 'win32': ctypes.wintypes.DWORD, # dwReserved ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh - ctypes.POINTER(OVERLAPPED) # Overlapped + ctypes.POINTER(OVERLAPPED), # Overlapped ] LockFileEx.restype = ctypes.wintypes.BOOL UnlockFileEx = kernel32.UnlockFileEx @@ -1514,7 +1514,7 @@ if sys.platform == 'win32': ctypes.wintypes.DWORD, # dwReserved ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh - ctypes.POINTER(OVERLAPPED) # Overlapped + ctypes.POINTER(OVERLAPPED), # Overlapped ] UnlockFileEx.restype = ctypes.wintypes.BOOL whole_low = 0xffffffff @@ -1537,7 +1537,7 @@ if sys.platform == 'win32': assert f._lock_file_overlapped_p handle = msvcrt.get_osfhandle(f.fileno()) if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p): - raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) + raise OSError(f'Unlocking file failed: {ctypes.FormatError()!r}') else: try: @@ -1564,10 +1564,10 @@ else: except ImportError: def _lock_file(f, exclusive, block): - raise LockingUnsupportedError() + raise LockingUnsupportedError def _unlock_file(f): - raise LockingUnsupportedError() + raise LockingUnsupportedError class locked_file: @@ -1926,7 +1926,7 @@ def remove_end(s, end): def remove_quotes(s): if s is None or len(s) < 2: return s - for quote in ('"', "'", ): + for quote in ('"', "'"): if s[0] == quote and s[-1] == quote: return s[1:-1] return s @@ -2085,26 +2085,27 @@ def parse_duration(s): (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1))) -def prepend_extension(filename, ext, expected_real_ext=None): +def _change_extension(prepend, filename, ext, expected_real_ext=None): name, real_ext = os.path.splitext(filename) - return ( - f'{name}.{ext}{real_ext}' - if not expected_real_ext or real_ext[1:] == expected_real_ext - else f'{filename}.{ext}') + if not expected_real_ext or real_ext[1:] == expected_real_ext: + filename = name + if prepend and real_ext: + _UnsafeExtensionError.sanitize_extension(ext, prepend=True) + return f'{filename}.{ext}{real_ext}' -def replace_extension(filename, ext, expected_real_ext=None): - name, real_ext = os.path.splitext(filename) - return '{}.{}'.format( - name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename, - ext) + return f'{filename}.{_UnsafeExtensionError.sanitize_extension(ext)}' + + +prepend_extension = functools.partial(_change_extension, True) +replace_extension = functools.partial(_change_extension, False) def check_executable(exe, args=[]): """ Checks if the given binary is installed somewhere in PATH, and returns its name. args can be a list of arguments for a short output (like -version) """ try: - Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + Popen.run([exe, *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError: return False return exe @@ -2115,7 +2116,7 @@ def _get_exe_version_output(exe, args): # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656 - stdout, _, ret = Popen.run([encodeArgument(exe)] + args, text=True, + stdout, _, ret = Popen.run([encodeArgument(exe), *args], text=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) if ret: return None @@ -2161,7 +2162,7 @@ class LazyList(collections.abc.Sequence): """Lazy immutable list from an iterable Note that slices of a LazyList are lists and not LazyList""" - class IndexError(IndexError): + class IndexError(IndexError): # noqa: A001 pass def __init__(self, iterable, *, reverse=False, _cache=None): @@ -2248,7 +2249,7 @@ class LazyList(collections.abc.Sequence): class PagedList: - class IndexError(IndexError): + class IndexError(IndexError): # noqa: A001 pass def __len__(self): @@ -2282,7 +2283,7 @@ class PagedList: raise TypeError('indices must be non-negative integers') entries = self.getslice(idx, idx + 1) if not entries: - raise self.IndexError() + raise self.IndexError return entries[0] def __bool__(self): @@ -2443,7 +2444,7 @@ class PlaylistEntries: except IndexError: entry = self.MissingEntry if not self.is_incomplete: - raise self.IndexError() + raise self.IndexError if entry is self.MissingEntry: raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found') return entry @@ -2452,7 +2453,7 @@ class PlaylistEntries: try: return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i) except (LazyList.IndexError, PagedList.IndexError): - raise self.IndexError() + raise self.IndexError return get_entry def __getitem__(self, idx): @@ -2488,7 +2489,7 @@ class PlaylistEntries: def __len__(self): return len(tuple(self[:])) - class IndexError(IndexError): + class IndexError(IndexError): # noqa: A001 pass @@ -2550,7 +2551,7 @@ def update_url(url, *, query_update=None, **kwargs): assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time' kwargs['query'] = urllib.parse.urlencode({ **urllib.parse.parse_qs(url.query), - **query_update + **query_update, }, True) return urllib.parse.urlunparse(url._replace(**kwargs)) @@ -2560,7 +2561,7 @@ def update_url_query(url, query): def _multipart_encode_impl(data, boundary): - content_type = 'multipart/form-data; boundary=%s' % boundary + content_type = f'multipart/form-data; boundary={boundary}' out = b'' for k, v in data.items(): @@ -2582,7 +2583,7 @@ def _multipart_encode_impl(data, boundary): def multipart_encode(data, boundary=None): - ''' + """ Encode a dict to RFC 7578-compliant form-data data: @@ -2593,7 +2594,7 @@ def multipart_encode(data, boundary=None): a random boundary is generated. Reference: https://tools.ietf.org/html/rfc7578 - ''' + """ has_specified_boundary = boundary is not None while True: @@ -2688,7 +2689,7 @@ def parse_age_limit(s): s = s.upper() if s in US_RATINGS: return US_RATINGS[s] - m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s) + m = re.match(r'^TV[_-]?({})$'.format('|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES)), s) if m: return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)] return None @@ -2736,7 +2737,7 @@ def js_to_json(code, vars={}, *, strict=False): return v elif v in ('undefined', 'void 0'): return 'null' - elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',': + elif v.startswith(('/*', '//', '!')) or v == ',': return '' if v[0] in STRING_QUOTES: @@ -3079,7 +3080,7 @@ def urlhandle_detect_ext(url_handle, default=NO_DEFAULT): def encode_data_uri(data, mime_type): - return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii')) + return 'data:{};base64,{}'.format(mime_type, base64.b64encode(data).decode('ascii')) def age_restricted(content_limit, age_limit): @@ -3144,18 +3145,18 @@ def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False): def get_max_lens(table): return [max(width(str(v)) for v in col) for col in zip(*table)] - def filter_using_list(row, filterArray): - return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take] + def filter_using_list(row, filter_array): + return [col for take, col in itertools.zip_longest(filter_array, row, fillvalue=True) if take] max_lens = get_max_lens(data) if hide_empty else [] header_row = filter_using_list(header_row, max_lens) data = [filter_using_list(row, max_lens) for row in data] - table = [header_row] + data + table = [header_row, *data] max_lens = get_max_lens(table) extra_gap += 1 if delim: - table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data + table = [header_row, [delim * (ml + extra_gap) for ml in max_lens], *data] table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter for row in table: for pos, text in enumerate(map(str, row)): @@ -3163,8 +3164,7 @@ def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False): row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap else: row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap) - ret = '\n'.join(''.join(row).rstrip() for row in table) - return ret + return '\n'.join(''.join(row).rstrip() for row in table) def _match_one(filter_part, dct, incomplete): @@ -3191,12 +3191,12 @@ def _match_one(filter_part, dct, incomplete): operator_rex = re.compile(r'''(?x) (?P<key>[a-z_]+) - \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* + \s*(?P<negation>!\s*)?(?P<op>{})(?P<none_inclusive>\s*\?)?\s* (?: (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)| (?P<strval>.+?) ) - ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) + '''.format('|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))) m = operator_rex.fullmatch(filter_part.strip()) if m: m = m.groupdict() @@ -3207,7 +3207,7 @@ def _match_one(filter_part, dct, incomplete): op = unnegated_op comparison_value = m['quotedstrval'] or m['strval'] or m['intval'] if m['quote']: - comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote']) + comparison_value = comparison_value.replace(r'\{}'.format(m['quote']), m['quote']) actual_value = dct.get(m['key']) numeric_comparison = None if isinstance(actual_value, (int, float)): @@ -3224,7 +3224,7 @@ def _match_one(filter_part, dct, incomplete): if numeric_comparison is None: numeric_comparison = parse_duration(comparison_value) if numeric_comparison is not None and m['op'] in STRING_OPERATORS: - raise ValueError('Operator %s only supports string values!' % m['op']) + raise ValueError('Operator {} only supports string values!'.format(m['op'])) if actual_value is None: return is_incomplete(m['key']) or m['none_inclusive'] return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison) @@ -3234,8 +3234,8 @@ def _match_one(filter_part, dct, incomplete): '!': lambda v: (v is False) if isinstance(v, bool) else (v is None), } operator_rex = re.compile(r'''(?x) - (?P<op>%s)\s*(?P<key>[a-z_]+) - ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) + (?P<op>{})\s*(?P<key>[a-z_]+) + '''.format('|'.join(map(re.escape, UNARY_OPERATORS.keys())))) m = operator_rex.fullmatch(filter_part.strip()) if m: op = UNARY_OPERATORS[m.group('op')] @@ -3244,7 +3244,7 @@ def _match_one(filter_part, dct, incomplete): return True return op(actual_value) - raise ValueError('Invalid filter part %r' % filter_part) + raise ValueError(f'Invalid filter part {filter_part!r}') def match_str(filter_str, dct, incomplete=False): @@ -3351,10 +3351,10 @@ def ass_subtitles_timecode(seconds): def dfxp2srt(dfxp_data): - ''' + """ @param dfxp_data A bytes-like object containing DFXP data @returns A unicode object containing converted SRT data - ''' + """ LEGACY_NAMESPACES = ( (b'http://www.w3.org/ns/ttml', [ b'http://www.w3.org/2004/11/ttaf1', @@ -3372,7 +3372,7 @@ def dfxp2srt(dfxp_data): 'fontSize', 'fontStyle', 'fontWeight', - 'textDecoration' + 'textDecoration', ] _x = functools.partial(xpath_with_ns, ns_map={ @@ -3410,11 +3410,11 @@ def dfxp2srt(dfxp_data): if self._applied_styles and self._applied_styles[-1].get(k) == v: continue if k == 'color': - font += ' color="%s"' % v + font += f' color="{v}"' elif k == 'fontSize': - font += ' size="%s"' % v + font += f' size="{v}"' elif k == 'fontFamily': - font += ' face="%s"' % v + font += f' face="{v}"' elif k == 'fontWeight' and v == 'bold': self._out += '<b>' unclosed_elements.append('b') @@ -3438,7 +3438,7 @@ def dfxp2srt(dfxp_data): if tag not in (_x('ttml:br'), 'br'): unclosed_elements = self._unclosed_elements.pop() for element in reversed(unclosed_elements): - self._out += '</%s>' % element + self._out += f'</{element}>' if unclosed_elements and self._applied_styles: self._applied_styles.pop() @@ -4349,7 +4349,7 @@ def bytes_to_long(s): def ohdave_rsa_encrypt(data, exponent, modulus): - ''' + """ Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/ Input: @@ -4358,11 +4358,11 @@ def ohdave_rsa_encrypt(data, exponent, modulus): Output: hex string of encrypted data Limitation: supports one block encryption only - ''' + """ payload = int(binascii.hexlify(data[::-1]), 16) encrypted = pow(payload, exponent, modulus) - return '%x' % encrypted + return f'{encrypted:x}' def pkcs1pad(data, length): @@ -4377,7 +4377,7 @@ def pkcs1pad(data, length): raise ValueError('Input data too long for PKCS#1 padding') pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)] - return [0, 2] + pseudo_random + [0] + data + return [0, 2, *pseudo_random, 0, *data] def _base_n_table(n, table): @@ -4710,16 +4710,14 @@ def jwt_encode_hs256(payload_data, key, headers={}): payload_b64 = base64.b64encode(json.dumps(payload_data).encode()) h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256) signature_b64 = base64.b64encode(h.digest()) - token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64 - return token + return header_b64 + b'.' + payload_b64 + b'.' + signature_b64 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256 def jwt_decode_hs256(jwt): header_b64, payload_b64, signature_b64 = jwt.split('.') # add trailing ='s that may have been stripped, superfluous ='s are ignored - payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}===')) - return payload_data + return json.loads(base64.urlsafe_b64decode(f'{payload_b64}===')) WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None @@ -4797,7 +4795,7 @@ def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re): """ _keys = ('width', 'height') max_dimensions = max( - (tuple(format.get(k) or 0 for k in _keys) for format in formats), + (tuple(fmt.get(k) or 0 for k in _keys) for fmt in formats), default=(0, 0)) if not max_dimensions[0]: return thumbnails @@ -5040,6 +5038,101 @@ MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests) +class _UnsafeExtensionError(Exception): + """ + Mitigation exception for uncommon/malicious file extensions + This should be caught in YoutubeDL.py alongside a warning + + Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j + """ + ALLOWED_EXTENSIONS = frozenset([ + # internal + 'description', + 'json', + 'meta', + 'orig', + 'part', + 'temp', + 'uncut', + 'unknown_video', + 'ytdl', + + # video + *MEDIA_EXTENSIONS.video, + 'avif', + 'ismv', + 'm2ts', + 'm4s', + 'mng', + 'mpeg', + 'qt', + 'swf', + 'ts', + 'vp9', + 'wvm', + + # audio + *MEDIA_EXTENSIONS.audio, + 'isma', + 'mid', + 'mpga', + 'ra', + + # image + *MEDIA_EXTENSIONS.thumbnails, + 'bmp', + 'gif', + 'heic', + 'ico', + 'jng', + 'jpeg', + 'jxl', + 'svg', + 'tif', + 'wbmp', + + # subtitle + *MEDIA_EXTENSIONS.subtitles, + 'dfxp', + 'fs', + 'ismt', + 'sami', + 'scc', + 'ssa', + 'tt', + 'ttml', + + # others + *MEDIA_EXTENSIONS.manifests, + *MEDIA_EXTENSIONS.storyboards, + 'desktop', + 'ism', + 'm3u', + 'sbv', + 'url', + 'webloc', + 'xml', + ]) + + def __init__(self, extension, /): + super().__init__(f'unsafe file extension: {extension!r}') + self.extension = extension + + @classmethod + def sanitize_extension(cls, extension, /, *, prepend=False): + if '/' in extension or '\\' in extension: + raise cls(extension) + + if not prepend: + _, _, last = extension.rpartition('.') + if last == 'bin': + extension = last = 'unknown_video' + if last.lower() not in cls.ALLOWED_EXTENSIONS: + raise cls(extension) + + return extension + + class RetryManager: """Usage: for retry in RetryManager(...): @@ -5193,7 +5286,7 @@ class FormatSorter: 'function': lambda it: next(filter(None, it), None)}, 'ext': {'type': 'combined', 'field': ('vext', 'aext')}, 'res': {'type': 'multiple', 'field': ('height', 'width'), - 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))}, + 'function': lambda it: min(filter(None, it), default=0)}, # Actual field names 'format_id': {'type': 'alias', 'field': 'id'}, @@ -5241,21 +5334,21 @@ class FormatSorter: self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is ' 'deprecated and may be removed in a future version') self.settings[field] = {} - propObj = self.settings[field] - if key not in propObj: - type = propObj.get('type') + prop_obj = self.settings[field] + if key not in prop_obj: + type_ = prop_obj.get('type') if key == 'field': - default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field + default = 'preference' if type_ == 'extractor' else (field,) if type_ in ('combined', 'multiple') else field elif key == 'convert': - default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore' + default = 'order' if type_ == 'ordered' else 'float_string' if field else 'ignore' else: - default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None) - propObj[key] = default - return propObj[key] + default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key) + prop_obj[key] = default + return prop_obj[key] - def _resolve_field_value(self, field, value, convertNone=False): + def _resolve_field_value(self, field, value, convert_none=False): if value is None: - if not convertNone: + if not convert_none: return None else: value = value.lower() @@ -5317,7 +5410,7 @@ class FormatSorter: for item in sort_list: match = re.match(self.regex, item) if match is None: - raise ExtractorError('Invalid format sort string "%s" given by extractor' % item) + raise ExtractorError(f'Invalid format sort string "{item}" given by extractor') field = match.group('field') if field is None: continue @@ -5345,31 +5438,31 @@ class FormatSorter: def print_verbose_info(self, write_debug): if self._sort_user: - write_debug('Sort order given by user: %s' % ', '.join(self._sort_user)) + write_debug('Sort order given by user: {}'.format(', '.join(self._sort_user))) if self._sort_extractor: - write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor)) - write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % ( + write_debug('Sort order given by extractor: {}'.format(', '.join(self._sort_extractor))) + write_debug('Formats sorted by: {}'.format(', '.join(['{}{}{}'.format( '+' if self._get_field_setting(field, 'reverse') else '', field, - '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':', - self._get_field_setting(field, 'limit_text'), - self._get_field_setting(field, 'limit')) + '{}{}({})'.format('~' if self._get_field_setting(field, 'closest') else ':', + self._get_field_setting(field, 'limit_text'), + self._get_field_setting(field, 'limit')) if self._get_field_setting(field, 'limit_text') is not None else '') - for field in self._order if self._get_field_setting(field, 'visible')])) + for field in self._order if self._get_field_setting(field, 'visible')]))) - def _calculate_field_preference_from_value(self, format, field, type, value): + def _calculate_field_preference_from_value(self, format_, field, type_, value): reverse = self._get_field_setting(field, 'reverse') closest = self._get_field_setting(field, 'closest') limit = self._get_field_setting(field, 'limit') - if type == 'extractor': + if type_ == 'extractor': maximum = self._get_field_setting(field, 'max') if value is None or (maximum is not None and value >= maximum): value = -1 - elif type == 'boolean': + elif type_ == 'boolean': in_list = self._get_field_setting(field, 'in_list') not_in_list = self._get_field_setting(field, 'not_in_list') value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1 - elif type == 'ordered': + elif type_ == 'ordered': value = self._resolve_field_value(field, value, True) # try to convert to number @@ -5385,17 +5478,17 @@ class FormatSorter: else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit else (-1, value, 0)) - def _calculate_field_preference(self, format, field): - type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple - get_value = lambda f: format.get(self._get_field_setting(f, 'field')) - if type == 'multiple': - type = 'field' # Only 'field' is allowed in multiple for now + def _calculate_field_preference(self, format_, field): + type_ = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple + get_value = lambda f: format_.get(self._get_field_setting(f, 'field')) + if type_ == 'multiple': + type_ = 'field' # Only 'field' is allowed in multiple for now actual_fields = self._get_field_setting(field, 'field') value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields) else: value = get_value(field) - return self._calculate_field_preference_from_value(format, field, type, value) + return self._calculate_field_preference_from_value(format_, field, type_, value) def calculate_preference(self, format): # Determine missing protocol diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py index 4b73252..933b164 100644 --- a/yt_dlp/utils/networking.py +++ b/yt_dlp/utils/networking.py @@ -112,7 +112,7 @@ def clean_proxies(proxies: dict, headers: HTTPHeaderDict): replace_scheme = { 'socks5': 'socks5h', # compat: socks5 was treated as socks5h - 'socks': 'socks4' # compat: non-standard + 'socks': 'socks4', # compat: non-standard } if proxy_scheme in replace_scheme: proxies[proxy_key] = urllib.parse.urlunparse( @@ -160,5 +160,5 @@ def normalize_url(url): path=escape_rfc3986(remove_dot_segments(url_parsed.path)), params=escape_rfc3986(url_parsed.params), query=escape_rfc3986(url_parsed.query), - fragment=escape_rfc3986(url_parsed.fragment) + fragment=escape_rfc3986(url_parsed.fragment), ).geturl() diff --git a/yt_dlp/version.py b/yt_dlp/version.py index a90b288..6e8fd3a 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.05.27' +__version__ = '2024.07.01' -RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b' +RELEASE_GIT_HEAD = '5ce582448ececb8d9c30c8c31f58330090ced03a' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.05.27' +_pkg_version = '2024.07.01' diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 7683bfb..9f1a508 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -77,9 +77,8 @@ class _MatchChildParser(_MatchParser): class ParseError(Exception): def __init__(self, parser): - super().__init__("Parse error at position %u (near %r)" % ( - parser._pos, parser._data[parser._pos:parser._pos + 100] - )) + data = parser._data[parser._pos:parser._pos + 100] + super().__init__(f'Parse error at position {parser._pos} (near {data!r})') # While the specification <https://www.w3.org/TR/webvtt1/#webvtt-timestamp> @@ -149,7 +148,7 @@ class Magic(HeaderBlock): # XXX: The X-TIMESTAMP-MAP extension is described in RFC 8216 §3.5 # <https://tools.ietf.org/html/rfc8216#section-3.5>, but the RFC - # doesn’t specify the exact grammar nor where in the WebVTT + # doesn't specify the exact grammar nor where in the WebVTT # syntax it should be placed; the below has been devised based # on usage in the wild # @@ -273,10 +272,10 @@ class CueBlock(Block): def parse(cls, parser): parser = parser.child() - id = None + id_ = None m = parser.consume(cls._REGEX_ID) if m: - id = m.group(1) + id_ = m.group(1) m0 = parser.consume(_REGEX_TS) if not m0: @@ -304,9 +303,9 @@ class CueBlock(Block): parser.commit() return cls( - id=id, + id=id_, start=start, end=end, settings=settings, - text=text.getvalue() + text=text.getvalue(), ) def write_into(self, stream): @@ -343,7 +342,7 @@ class CueBlock(Block): start=json['start'], end=json['end'], text=json['text'], - settings=json['settings'] + settings=json['settings'], ) def hinges(self, other): |