diff options
Diffstat (limited to 'devscripts')
29 files changed, 2072 insertions, 0 deletions
diff --git a/devscripts/__init__.py b/devscripts/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/devscripts/__init__.py diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in new file mode 100644 index 0000000..21f5279 --- /dev/null +++ b/devscripts/bash-completion.in @@ -0,0 +1,29 @@ +__yt_dlp() +{ + local cur prev opts fileopts diropts keywords + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + opts="{{flags}}" + keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" + fileopts="-a|--batch-file|--download-archive|--cookies|--load-info" + diropts="--cache-dir" + + if [[ ${prev} =~ ${fileopts} ]]; then + COMPREPLY=( $(compgen -f -- ${cur}) ) + return 0 + elif [[ ${prev} =~ ${diropts} ]]; then + COMPREPLY=( $(compgen -d -- ${cur}) ) + return 0 + fi + + if [[ ${cur} =~ : ]]; then + COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) + return 0 + elif [[ ${cur} == * ]] ; then + COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) + return 0 + fi +} + +complete -F __yt_dlp yt-dlp diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py new file mode 100755 index 0000000..9b4a9d4 --- /dev/null +++ b/devscripts/bash-completion.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import yt_dlp + +BASH_COMPLETION_FILE = "completions/bash/yt-dlp" +BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" + + +def build_completion(opt_parser): + opts_flag = [] + for group in opt_parser.option_groups: + for option in group.option_list: + # for every long flag + opts_flag.append(option.get_opt_string()) + with open(BASH_COMPLETION_TEMPLATE) as f: + template = f.read() + with open(BASH_COMPLETION_FILE, "w") as f: + # just using the special char + filled_template = template.replace("{{flags}}", " ".join(opts_flag)) + f.write(filled_template) + + +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] +build_completion(parser) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json new file mode 100644 index 0000000..2a34ad0 --- /dev/null +++ b/devscripts/changelog_override.json @@ -0,0 +1,130 @@ +[ + { + "action": "add", + "when": "29cb20bd563c02671b31dd840139e93dd37150a1", + "short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`" + }, + { + "action": "add", + "when": "5038f6d713303e0967d002216e7a88652401c22a", + "short": "[priority] **YouTube throttling fixes!**" + }, + { + "action": "remove", + "when": "2e023649ea4e11151545a34dc1360c114981a236" + }, + { + "action": "add", + "when": "01aba2519a0884ef17d5f85608dbd2a455577147", + "short": "[priority] YouTube: Improved throttling and signature fixes" + }, + { + "action": "change", + "when": "c86e433c35fe5da6cb29f3539eef97497f84ed38", + "short": "[extractor/niconico:series] Fix extraction (#6898)", + "authors": ["sqrtNOT"] + }, + { + "action": "change", + "when": "69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2", + "short": "[extractor/youtube:music_search_url] Extract title (#7102)", + "authors": ["kangalio"] + }, + { + "action": "change", + "when": "8417f26b8a819cd7ffcd4e000ca3e45033e670fb", + "short": "Add option `--color` (#6904)", + "authors": ["Grub4K"] + }, + { + "action": "change", + "when": "b4e0d75848e9447cee2cd3646ce54d4744a7ff56", + "short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL", + "authors": ["pukkandan"] + }, + { + "action": "change", + "when": "1e75d97db21152acc764b30a688e516f04b8a142", + "short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such", + "authors": ["pukkandan"] + }, + { + "action": "change", + "when": "f2ff0f6f1914b82d4a51681a72cc0828115dcb4a", + "short": "[extractor/motherless] Add gallery support, fix groups (#7211)", + "authors": ["rexlambert22", "Ti4eeT4e"] + }, + { + "action": "change", + "when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700", + "short": "[misc] Revert \"Add automatic duplicate issue detection\"", + "authors": ["pukkandan"] + }, + { + "action": "add", + "when": "1ceb657bdd254ad961489e5060f2ccc7d556b729", + "short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookies` field to info.json and deprecate `http_headers.Cookie`" + }, + { + "action": "change", + "when": "b03fa7834579a01cc5fba48c0e73488a16683d48", + "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b", + "authors": ["pukkandan"] + }, + { + "action": "change", + "when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f", + "short": "[test] Add tests for socks proxies (#7908)", + "authors": ["coletdjnz"] + }, + { + "action": "change", + "when": "4bf912282a34b58b6b35d8f7e6be535770c89c76", + "short": "[rh:urllib] Remove dot segments during URL normalization (#7662)", + "authors": ["coletdjnz"] + }, + { + "action": "change", + "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85", + "short": "[rh:urllib] Simplify gzip decoding (#7611)", + "authors": ["Grub4K"] + }, + { + "action": "add", + "when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b", + "short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)" + }, + { + "action": "add", + "when": "61bdf15fc7400601c3da1aa7a43917310a5bf391", + "short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands." + }, + { + "action": "change", + "when": "8a8b54523addf46dfd50ef599761a81bc22362e6", + "short": "[rh:requests] Add handler for `requests` HTTP library (#3668)\n\n\tAdds support for HTTPS proxies and persistent connections (keep-alive)", + "authors": ["bashonly", "coletdjnz", "Grub4K"] + }, + { + "action": "add", + "when": "1d03633c5a1621b9f3a756f0a4f9dc61fab3aeaa", + "short": "[priority] **The release channels have been adjusted!**\n\t* [`master`](https://github.com/yt-dlp/yt-dlp-master-builds) builds are made after each push, containing the latest fixes (but also possibly bugs). This was previously the `nightly` channel.\n\t* [`nightly`](https://github.com/yt-dlp/yt-dlp-nightly-builds) builds are now made once a day, if there were any changes." + }, + { + "action": "add", + "when": "f04b5bedad7b281bee9814686bba1762bae092eb", + "short": "[priority] Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x)\n\t- Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers" + }, + { + "action": "change", + "when": "15f22b4880b6b3f71f350c64d70976ae65b9f1ca", + "short": "[webvtt] Allow spaces before newlines for CueBlock (#7681)", + "authors": ["TSRBerry"] + }, + { + "action": "change", + "when": "4ce57d3b873c2887814cbec03d029533e82f7db5", + "short": "[ie] Support multi-period MPD streams (#6654)", + "authors": ["alard", "pukkandan"] + } +] diff --git a/devscripts/changelog_override.schema.json b/devscripts/changelog_override.schema.json new file mode 100644 index 0000000..9bd747b --- /dev/null +++ b/devscripts/changelog_override.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft/2020-12/schema", + "type": "array", + "uniqueItems": true, + "items": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "action": { + "enum": [ + "add" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "short" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "remove" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + } + }, + "required": [ + "action", + "hash" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "change" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "hash", + "short", + "authors" + ] + } + ] + } +} diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py new file mode 100644 index 0000000..fc72c30 --- /dev/null +++ b/devscripts/check-porn.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +""" +This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check +if we are not 'age_limit' tagging some porn site + +A second approach implemented relies on a list of porn domains, to activate it +pass the list filename as the only argument +""" + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import urllib.parse +import urllib.request + +from test.helper import gettestcases + +if len(sys.argv) > 1: + METHOD = 'LIST' + LIST = open(sys.argv[1]).read().decode('utf8').strip() +else: + METHOD = 'EURISTIC' + +for test in gettestcases(): + if METHOD == 'EURISTIC': + try: + webpage = urllib.request.urlopen(test['url'], timeout=10).read() + except Exception: + print('\nFail: {}'.format(test['name'])) + continue + + webpage = webpage.decode('utf8', 'replace') + + RESULT = 'porn' in webpage.lower() + + elif METHOD == 'LIST': + domain = urllib.parse.urlparse(test['url']).netloc + if not domain: + print('\nFail: {}'.format(test['name'])) + continue + domain = '.'.join(domain.split('.')[-2:]) + + RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) + + if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] + or test['info_dict']['age_limit'] != 18): + print('\nPotential missing age_limit check: {}'.format(test['name'])) + + elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] + and test['info_dict']['age_limit'] == 18): + print('\nPotential false negative: {}'.format(test['name'])) + + else: + sys.stdout.write('.') + sys.stdout.flush() + +print() diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py new file mode 100644 index 0000000..2aa51eb --- /dev/null +++ b/devscripts/cli_to_api.py @@ -0,0 +1,48 @@ +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import yt_dlp +import yt_dlp.options + +create_parser = yt_dlp.options.create_parser + + +def parse_patched_options(opts): + patched_parser = create_parser() + patched_parser.defaults.update({ + 'ignoreerrors': False, + 'retries': 0, + 'fragment_retries': 0, + 'extract_flat': False, + 'concat_playlist': 'never', + }) + yt_dlp.options.create_parser = lambda: patched_parser + try: + return yt_dlp.parse_options(opts) + finally: + yt_dlp.options.create_parser = create_parser + + +default_opts = parse_patched_options([]).ydl_opts + + +def cli_to_api(opts, cli_defaults=False): + opts = (yt_dlp.parse_options if cli_defaults else parse_patched_options)(opts).ydl_opts + + diff = {k: v for k, v in opts.items() if default_opts[k] != v} + if 'postprocessors' in diff: + diff['postprocessors'] = [pp for pp in diff['postprocessors'] + if pp not in default_opts['postprocessors']] + return diff + + +if __name__ == '__main__': + from pprint import pprint + + print('\nThe arguments passed translate to:\n') + pprint(cli_to_api(sys.argv[1:])) + print('\nCombining these with the CLI defaults gives:\n') + pprint(cli_to_api(sys.argv[1:], True)) diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in new file mode 100644 index 0000000..32938fb --- /dev/null +++ b/devscripts/fish-completion.in @@ -0,0 +1,5 @@ + +{{commands}} + + +complete --command yt-dlp --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py new file mode 100755 index 0000000..5d2f68a --- /dev/null +++ b/devscripts/fish-completion.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import optparse + +import yt_dlp +from yt_dlp.utils import shell_quote + +FISH_COMPLETION_FILE = 'completions/fish/yt-dlp.fish' +FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' + +EXTRA_ARGS = { + 'remux-video': ['--arguments', 'mp4 mkv', '--exclusive'], + 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'], + + # Options that need a file parameter + 'download-archive': ['--require-parameter'], + 'cookies': ['--require-parameter'], + 'load-info': ['--require-parameter'], + 'batch-file': ['--require-parameter'], +} + + +def build_completion(opt_parser): + commands = [] + + for group in opt_parser.option_groups: + for option in group.option_list: + long_option = option.get_opt_string().strip('-') + complete_cmd = ['complete', '--command', 'yt-dlp', '--long-option', long_option] + if option._short_opts: + complete_cmd += ['--short-option', option._short_opts[0].strip('-')] + if option.help != optparse.SUPPRESS_HELP: + complete_cmd += ['--description', option.help] + complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) + commands.append(shell_quote(complete_cmd)) + + with open(FISH_COMPLETION_TEMPLATE) as f: + template = f.read() + filled_template = template.replace('{{commands}}', '\n'.join(commands)) + with open(FISH_COMPLETION_FILE, 'w') as f: + f.write(filled_template) + + +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] +build_completion(parser) diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py new file mode 100644 index 0000000..7f3c88b --- /dev/null +++ b/devscripts/generate_aes_testdata.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import codecs +import subprocess + +from yt_dlp.aes import aes_encrypt, key_expansion +from yt_dlp.utils import intlist_to_bytes + +secret_msg = b'Secret message goes here' + + +def hex_str(int_list): + return codecs.encode(intlist_to_bytes(int_list), 'hex') + + +def openssl_encode(algo, key, iv): + cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] + prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + out, _ = prog.communicate(secret_msg) + return out + + +iv = key = [0x20, 0x15] + 14 * [0] + +r = openssl_encode('aes-128-cbc', key, iv) +print('aes_cbc_decrypt') +print(repr(r)) + +password = key +new_key = aes_encrypt(password, key_expansion(password)) +r = openssl_encode('aes-128-ctr', new_key, iv) +print('aes_decrypt_text 16') +print(repr(r)) + +password = key + 16 * [0] +new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16) +r = openssl_encode('aes-256-ctr', new_key, iv) +print('aes_decrypt_text 32') +print(repr(r)) diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py new file mode 100755 index 0000000..889d9ab --- /dev/null +++ b/devscripts/install_deps.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +# Allow execution from anywhere +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import argparse +import re +import subprocess + +from devscripts.tomlparse import parse_toml +from devscripts.utils import read_file + + +def parse_args(): + parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp') + parser.add_argument( + 'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)') + parser.add_argument( + '-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency') + parser.add_argument( + '-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group') + parser.add_argument( + '-o', '--only-optional', action='store_true', help='Only install optional dependencies') + parser.add_argument( + '-p', '--print', action='store_true', help='Only print a requirements.txt to stdout') + parser.add_argument( + '-u', '--user', action='store_true', help='Install with pip as --user') + return parser.parse_args() + + +def main(): + args = parse_args() + project_table = parse_toml(read_file(args.input))['project'] + optional_groups = project_table['optional-dependencies'] + excludes = args.exclude or [] + + deps = [] + if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group + deps.extend(project_table['dependencies']) + if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group + deps.extend(optional_groups['default']) + + def name(dependency): + return re.match(r'[\w-]+', dependency)[0].lower() + + target_map = {name(dep): dep for dep in deps} + + for include in filter(None, map(optional_groups.get, args.include or [])): + target_map.update(zip(map(name, include), include)) + + for exclude in map(name, excludes): + target_map.pop(exclude, None) + + targets = list(target_map.values()) + + if args.print: + for target in targets: + print(target) + return + + pip_args = [sys.executable, '-m', 'pip', 'install', '-U'] + if args.user: + pip_args.append('--user') + pip_args.extend(targets) + + return subprocess.call(pip_args) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py new file mode 100644 index 0000000..6f52165 --- /dev/null +++ b/devscripts/lazy_load_template.py @@ -0,0 +1,39 @@ +import importlib +import random +import re + +from ..utils import ( + age_restricted, + bug_reports_message, + classproperty, + variadic, + write_string, +) + +# These bloat the lazy_extractors, so allow them to passthrough silently +ALLOWED_CLASSMETHODS = {'extract_from_webpage', 'get_testcases', 'get_webpage_testcases'} +_WARNED = False + + +class LazyLoadMetaClass(type): + def __getattr__(cls, name): + global _WARNED + if ('_real_class' not in cls.__dict__ + and name not in ALLOWED_CLASSMETHODS and not _WARNED): + _WARNED = True + write_string('WARNING: Falling back to normal extractor since lazy extractor ' + f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n') + return getattr(cls.real_class, name) + + +class LazyLoadExtractor(metaclass=LazyLoadMetaClass): + @classproperty + def real_class(cls): + if '_real_class' not in cls.__dict__: + cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__) + return cls._real_class + + def __new__(cls, *args, **kwargs): + instance = cls.real_class.__new__(cls.real_class) + instance.__init__(*args, **kwargs) + return instance diff --git a/devscripts/logo.ico b/devscripts/logo.ico Binary files differnew file mode 100644 index 0000000..5503a43 --- /dev/null +++ b/devscripts/logo.ico diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py new file mode 100644 index 0000000..faab5fa --- /dev/null +++ b/devscripts/make_changelog.py @@ -0,0 +1,503 @@ +from __future__ import annotations + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import enum +import itertools +import json +import logging +import re +from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path + +from devscripts.utils import read_file, run_process, write_file + +BASE_URL = 'https://github.com' +LOCATION_PATH = Path(__file__).parent +HASH_LENGTH = 7 + +logger = logging.getLogger(__name__) + + +class CommitGroup(enum.Enum): + PRIORITY = 'Important' + CORE = 'Core' + EXTRACTOR = 'Extractor' + DOWNLOADER = 'Downloader' + POSTPROCESSOR = 'Postprocessor' + NETWORKING = 'Networking' + MISC = 'Misc.' + + @classmethod + @lru_cache + def subgroup_lookup(cls): + return { + name: group + for group, names in { + cls.MISC: { + 'build', + 'ci', + 'cleanup', + 'devscripts', + 'docs', + 'test', + }, + cls.NETWORKING: { + 'rh', + }, + }.items() + for name in names + } + + @classmethod + @lru_cache + def group_lookup(cls): + result = { + 'fd': cls.DOWNLOADER, + 'ie': cls.EXTRACTOR, + 'pp': cls.POSTPROCESSOR, + 'upstream': cls.CORE, + } + result.update({item.name.lower(): item for item in iter(cls)}) + return result + + @classmethod + def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: + group, _, subgroup = (group.strip().lower() for group in value.partition('/')) + + result = cls.group_lookup().get(group) + if not result: + if subgroup: + return None, value + subgroup = group + result = cls.subgroup_lookup().get(subgroup) + + return result, subgroup or None + + +@dataclass +class Commit: + hash: str | None + short: str + authors: list[str] + + def __str__(self): + result = f'{self.short!r}' + + if self.hash: + result += f' ({self.hash[:HASH_LENGTH]})' + + if self.authors: + authors = ', '.join(self.authors) + result += f' by {authors}' + + return result + + +@dataclass +class CommitInfo: + details: str | None + sub_details: tuple[str, ...] + message: str + issues: list[str] + commit: Commit + fixes: list[Commit] + + def key(self): + return ((self.details or '').lower(), self.sub_details, self.message) + + +def unique(items): + return sorted({item.strip().lower(): item for item in items if item}.values()) + + +class Changelog: + MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) + ALWAYS_SHOWN = (CommitGroup.PRIORITY,) + + def __init__(self, groups, repo, collapsible=False): + self._groups = groups + self._repo = repo + self._collapsible = collapsible + + def __str__(self): + return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') + + def _format_groups(self, groups): + first = True + for item in CommitGroup: + if self._collapsible and item not in self.ALWAYS_SHOWN and first: + first = False + yield '\n<details><summary><h3>Changelog</h3></summary>\n' + + group = groups[item] + if group: + yield self.format_module(item.value, group) + + if self._collapsible: + yield '\n</details>' + + def format_module(self, name, group): + result = f'\n#### {name} changes\n' if name else '\n' + return result + '\n'.join(self._format_group(group)) + + def _format_group(self, group): + sorted_group = sorted(group, key=CommitInfo.key) + detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) + for _, items in detail_groups: + items = list(items) + details = items[0].details + + if details == 'cleanup': + items = self._prepare_cleanup_misc_items(items) + + prefix = '-' + if details: + if len(items) == 1: + prefix = f'- **{details}**:' + else: + yield f'- **{details}**' + prefix = '\t-' + + sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details))) + for sub_details, entries in sub_detail_groups: + if not sub_details: + for entry in entries: + yield f'{prefix} {self.format_single_change(entry)}' + continue + + entries = list(entries) + sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}' + if len(entries) == 1: + yield f'{sub_prefix}: {self.format_single_change(entries[0])}' + continue + + yield sub_prefix + for entry in entries: + yield f'\t{prefix} {self.format_single_change(entry)}' + + def _prepare_cleanup_misc_items(self, items): + cleanup_misc_items = defaultdict(list) + sorted_items = [] + for item in items: + if self.MISC_RE.search(item.message): + cleanup_misc_items[tuple(item.commit.authors)].append(item) + else: + sorted_items.append(item) + + for commit_infos in cleanup_misc_items.values(): + sorted_items.append(CommitInfo( + 'cleanup', ('Miscellaneous',), ', '.join( + self._format_message_link(None, info.commit.hash) + for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')), + [], Commit(None, '', commit_infos[0].commit.authors), [])) + + return sorted_items + + def format_single_change(self, info: CommitInfo): + message, sep, rest = info.message.partition('\n') + if '[' not in message: + # If the message doesn't already contain markdown links, try to add a link to the commit + message = self._format_message_link(message, info.commit.hash) + + if info.issues: + message = f'{message} ({self._format_issues(info.issues)})' + + if info.commit.authors: + message = f'{message} by {self._format_authors(info.commit.authors)}' + + if info.fixes: + fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) + + authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) + if authors != info.commit.authors: + fix_message = f'{fix_message} by {self._format_authors(authors)}' + + message = f'{message} (With fixes in {fix_message})' + + return message if not sep else f'{message}{sep}{rest}' + + def _format_message_link(self, message, hash): + assert message or hash, 'Improperly defined commit message or override' + message = message if message else hash[:HASH_LENGTH] + return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message + + def _format_issues(self, issues): + return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) + + @staticmethod + def _format_authors(authors): + return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) + + @property + def repo_url(self): + return f'{BASE_URL}/{self._repo}' + + +class CommitRange: + COMMAND = 'git' + COMMIT_SEPARATOR = '-----' + + AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) + MESSAGE_RE = re.compile(r''' + (?:\[(?P<prefix>[^\]]+)\]\ )? + (?:(?P<sub_details>`?[\w.-]+`?): )? + (?P<message>.+?) + (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))? + ''', re.VERBOSE | re.DOTALL) + EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) + REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') + FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})') + UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') + + def __init__(self, start, end, default_author=None): + self._start, self._end = start, end + self._commits, self._fixes = self._get_commits_and_fixes(default_author) + self._commits_added = [] + + def __iter__(self): + return iter(itertools.chain(self._commits.values(), self._commits_added)) + + def __len__(self): + return len(self._commits) + len(self._commits_added) + + def __contains__(self, commit): + if isinstance(commit, Commit): + if not commit.hash: + return False + commit = commit.hash + + return commit in self._commits + + def _get_commits_and_fixes(self, default_author): + result = run_process( + self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', + f'{self._start}..{self._end}' if self._start else self._end).stdout + + commits, reverts = {}, {} + fixes = defaultdict(list) + lines = iter(result.splitlines(False)) + for i, commit_hash in enumerate(lines): + short = next(lines) + skip = short.startswith('Release ') or short == '[version] update' + + authors = [default_author] if default_author else [] + for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): + match = self.AUTHOR_INDICATOR_RE.match(line) + if match: + authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) + + commit = Commit(commit_hash, short, authors) + if skip and (self._start or not i): + logger.debug(f'Skipped commit: {commit}') + continue + elif skip: + logger.debug(f'Reached Release commit, breaking: {commit}') + break + + revert_match = self.REVERT_RE.fullmatch(commit.short) + if revert_match: + reverts[revert_match.group(1)] = commit + continue + + fix_match = self.FIXES_RE.search(commit.short) + if fix_match: + commitish = fix_match.group(1) + fixes[commitish].append(commit) + + commits[commit.hash] = commit + + for commitish, revert_commit in reverts.items(): + reverted = commits.pop(commitish, None) + if reverted: + logger.debug(f'{commitish} fully reverted {reverted}') + else: + commits[revert_commit.hash] = revert_commit + + for commitish, fix_commits in fixes.items(): + if commitish in commits: + hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits) + logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}') + for fix_commit in fix_commits: + del commits[fix_commit.hash] + else: + logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}') + + return commits, fixes + + def apply_overrides(self, overrides): + for override in overrides: + when = override.get('when') + if when and when not in self and when != self._start: + logger.debug(f'Ignored {when!r} override') + continue + + override_hash = override.get('hash') or when + if override['action'] == 'add': + commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) + logger.info(f'ADD {commit}') + self._commits_added.append(commit) + + elif override['action'] == 'remove': + if override_hash in self._commits: + logger.info(f'REMOVE {self._commits[override_hash]}') + del self._commits[override_hash] + + elif override['action'] == 'change': + if override_hash not in self._commits: + continue + commit = Commit(override_hash, override['short'], override.get('authors') or []) + logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') + self._commits[commit.hash] = commit + + self._commits = {key: value for key, value in reversed(self._commits.items())} + + def groups(self): + group_dict = defaultdict(list) + for commit in self: + upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short) + if upstream_re: + commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}' + + match = self.MESSAGE_RE.fullmatch(commit.short) + if not match: + logger.error(f'Error parsing short commit message: {commit.short!r}') + continue + + prefix, sub_details_alt, message, issues = match.groups() + issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] + + if prefix: + groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(','))) + group = next(iter(filter(None, groups)), None) + details = ', '.join(unique(details)) + sub_details = list(itertools.chain.from_iterable(sub_details)) + else: + group = CommitGroup.CORE + details = None + sub_details = [] + + if sub_details_alt: + sub_details.append(sub_details_alt) + sub_details = tuple(unique(sub_details)) + + if not group: + if self.EXTRACTOR_INDICATOR_RE.search(commit.short): + group = CommitGroup.EXTRACTOR + logger.error(f'Assuming [ie] group for {commit.short!r}') + else: + group = CommitGroup.CORE + + commit_info = CommitInfo( + details, sub_details, message.strip(), + issues, commit, self._fixes[commit.hash]) + + logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') + group_dict[group].append(commit_info) + + return group_dict + + @staticmethod + def details_from_prefix(prefix): + if not prefix: + return CommitGroup.CORE, None, () + + prefix, *sub_details = prefix.split(':') + + group, details = CommitGroup.get(prefix) + if group is CommitGroup.PRIORITY and details: + details = details.partition('/')[2].strip() + + if details and '/' in details: + logger.error(f'Prefix is overnested, using first part: {prefix}') + details = details.partition('/')[0].strip() + + if details == 'common': + details = None + elif group is CommitGroup.NETWORKING and details == 'rh': + details = 'Request Handler' + + return group, details, sub_details + + +def get_new_contributors(contributors_path, commits): + contributors = set() + if contributors_path.exists(): + for line in read_file(contributors_path).splitlines(): + author, _, _ = line.strip().partition(' (') + authors = author.split('/') + contributors.update(map(str.casefold, authors)) + + new_contributors = set() + for commit in commits: + for author in commit.authors: + author_folded = author.casefold() + if author_folded not in contributors: + contributors.add(author_folded) + new_contributors.add(author) + + return sorted(new_contributors, key=str.casefold) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser( + description='Create a changelog markdown from a git commit range') + parser.add_argument( + 'commitish', default='HEAD', nargs='?', + help='The commitish to create the range from (default: %(default)s)') + parser.add_argument( + '-v', '--verbosity', action='count', default=0, + help='increase verbosity (can be used twice)') + parser.add_argument( + '-c', '--contributors', action='store_true', + help='update CONTRIBUTORS file (default: %(default)s)') + parser.add_argument( + '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', + help='path to the CONTRIBUTORS file') + parser.add_argument( + '--no-override', action='store_true', + help='skip override json in commit generation (default: %(default)s)') + parser.add_argument( + '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', + help='path to the changelog_override.json file') + parser.add_argument( + '--default-author', default='pukkandan', + help='the author to use without a author indicator (default: %(default)s)') + parser.add_argument( + '--repo', default='yt-dlp/yt-dlp', + help='the github repository to use for the operations (default: %(default)s)') + parser.add_argument( + '--collapsible', action='store_true', + help='make changelog collapsible (default: %(default)s)') + args = parser.parse_args() + + logging.basicConfig( + datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', + level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) + + commits = CommitRange(None, args.commitish, args.default_author) + + if not args.no_override: + if args.override_path.exists(): + overrides = json.loads(read_file(args.override_path)) + commits.apply_overrides(overrides) + else: + logger.warning(f'File {args.override_path.as_posix()} does not exist') + + logger.info(f'Loaded {len(commits)} commits') + + new_contributors = get_new_contributors(args.contributors_path, commits) + if new_contributors: + if args.contributors: + write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') + logger.info(f'New contributors: {", ".join(new_contributors)}') + + print(Changelog(commits.groups(), args.repo, args.collapsible)) diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py new file mode 100755 index 0000000..a06f8a6 --- /dev/null +++ b/devscripts/make_contributing.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import optparse +import re + + +def main(): + return # This is unused in yt-dlp + + parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') + options, args = parser.parse_args() + if len(args) != 2: + parser.error('Expected an input and an output filename') + + infile, outfile = args + + with open(infile, encoding='utf-8') as inf: + readme = inf.read() + + bug_text = re.search( + r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) + dev_text = re.search( + r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', readme).group(1) + + out = bug_text + dev_text + + with open(outfile, 'w', encoding='utf-8') as outf: + outf.write(out) + + +if __name__ == '__main__': + main() diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py new file mode 100644 index 0000000..a5d59f3 --- /dev/null +++ b/devscripts/make_issue_template.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import re + +from devscripts.utils import get_filename_args, read_file, write_file + +VERBOSE_TMPL = ''' + - type: checkboxes + id: verbose + attributes: + label: Provide verbose output that clearly demonstrates the problem + options: + - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`) + required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false + - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below + required: true + - type: textarea + id: log + attributes: + label: Complete Verbose Output + description: | + It should start like this: + placeholder: | + [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) + [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 + [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 + [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] Proxy map: {} + [debug] Request Handlers: urllib, requests + [debug] Loaded 1893 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) + [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc + <more lines> + render: shell + validations: + required: true +'''.strip() + +NO_SKIP = ''' + - type: checkboxes + attributes: + label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE + description: Fill all fields even if you think it is irrelevant for the issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\\* field + required: true +'''.strip() + + +def main(): + fields = {'no_skip': NO_SKIP} + fields['verbose'] = VERBOSE_TMPL % fields + fields['verbose_optional'] = re.sub(r'(\n\s+validations:)?\n\s+required: true', '', fields['verbose']) + + infile, outfile = get_filename_args(has_infile=True) + write_file(outfile, read_file(infile) % fields) + + +if __name__ == '__main__': + main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py new file mode 100644 index 0000000..d74ea20 --- /dev/null +++ b/devscripts/make_lazy_extractors.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import shutil +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from inspect import getsource + +from devscripts.utils import get_filename_args, read_file, write_file + +NO_ATTR = object() +STATIC_CLASS_PROPERTIES = [ + 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching + '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions + 'age_limit', # Used for --age-limit (evaluated) + '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated) +] +CLASS_METHODS = [ + 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching + 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation + 'description', # Used for --extractor-descriptions + 'is_suitable', # Used for --age-limit + 'supports_login', 'is_single_video', # Accessed in CLI only with instance +] +IE_TEMPLATE = ''' +class {name}({bases}): + _module = {module!r} +''' +MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py') + + +def main(): + lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py') + if os.path.exists(lazy_extractors_filename): + os.remove(lazy_extractors_filename) + + _ALL_CLASSES = get_all_ies() # Must be before import + + import yt_dlp.plugins + from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor + + # Filter out plugins + _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')] + + DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) + module_src = '\n'.join(( + MODULE_TEMPLATE, + ' _module = None', + *extra_ie_code(DummyInfoExtractor), + '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n', + *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor), + )) + + write_file(lazy_extractors_filename, f'{module_src}\n') + + +def get_all_ies(): + PLUGINS_DIRNAME = 'ytdlp_plugins' + BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked' + if os.path.exists(PLUGINS_DIRNAME): + # os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958 + shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME) + try: + from yt_dlp.extractor.extractors import _ALL_CLASSES + finally: + if os.path.exists(BLOCKED_DIRNAME): + shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME) + return _ALL_CLASSES + + +def extra_ie_code(ie, base=None): + for var in STATIC_CLASS_PROPERTIES: + val = getattr(ie, var) + if val != (getattr(base, var) if base else NO_ATTR): + yield f' {var} = {val!r}' + yield '' + + for name in CLASS_METHODS: + f = getattr(ie, name) + if not base or f.__func__ != getattr(base, name).__func__: + yield getsource(f) + + +def build_ies(ies, bases, attr_base): + names = [] + for ie in sort_ies(ies, bases): + yield build_lazy_ie(ie, ie.__name__, attr_base) + if ie in ies: + names.append(ie.__name__) + + yield f'\n_ALL_CLASSES = [{", ".join(names)}]' + + +def sort_ies(ies, ignored_bases): + """find the correct sorting and add the required base classes so that subclasses can be correctly created""" + classes, returned_classes = ies[:-1], set() + assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE' + while classes: + for c in classes[:]: + bases = set(c.__bases__) - {object, *ignored_bases} + restart = False + for b in sorted(bases, key=lambda x: x.__name__): + if b not in classes and b not in returned_classes: + assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE' + classes.insert(0, b) + restart = True + if restart: + break + if bases <= returned_classes: + yield c + returned_classes.add(c) + classes.remove(c) + break + yield ies[-1] + + +def build_lazy_ie(ie, name, attr_base): + bases = ', '.join({ + 'InfoExtractor': 'LazyLoadExtractor', + 'SearchInfoExtractor': 'LazyLoadSearchExtractor', + }.get(base.__name__, base.__name__) for base in ie.__bases__) + + s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases) + return s + '\n'.join(extra_ie_code(ie, attr_base)) + + +if __name__ == '__main__': + main() diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py new file mode 100755 index 0000000..2270b31 --- /dev/null +++ b/devscripts/make_readme.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +""" +yt-dlp --help | make_readme.py +This must be run in a console of correct width +""" + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import functools +import re + +from devscripts.utils import read_file, write_file + +README_FILE = 'README.md' + +OPTIONS_START = 'General Options:' +OPTIONS_END = 'CONFIGURATION' +EPILOG_START = 'See full documentation' +ALLOWED_OVERSHOOT = 2 + +DISABLE_PATCH = object() + + +def take_section(text, start=None, end=None, *, shift=0): + return text[ + text.index(start) + shift if start else None: + text.index(end) + shift if end else None + ] + + +def apply_patch(text, patch): + return text if patch[0] is DISABLE_PATCH else re.sub(*patch, text) + + +options = take_section(sys.stdin.read(), f'\n {OPTIONS_START}', f'\n{EPILOG_START}', shift=1) + +max_width = max(map(len, options.split('\n'))) +switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group()) +delim = f'\n{" " * switch_col_width}' + +PATCHES = ( + ( # Standardize `--update` message + r'(?m)^( -U, --update\s+).+(\n \s.+)*$', + r'\1Update this program to the latest version', + ), + ( # Headings + r'(?m)^ (\w.+\n)( (?=\w))?', + r'## \1' + ), + ( # Fixup `--date` formatting + rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$', + (rf'\1[now|today|yesterday][-N[day|week|month|year]].{delim}' + f'E.g. "--date today-2weeks" downloads only{delim}' + 'videos uploaded on the same day two weeks ago'), + ), + ( # Do not split URLs + rf'({delim[:-1]})? (?P<label>\[\S+\] )?(?P<url>https?({delim})?:({delim})?/({delim})?/(({delim})?\S+)+)\s', + lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n')) + ), + ( # Do not split "words" + rf'(?m)({delim}\S+)+$', + lambda mobj: ''.join((delim, mobj.group(0).replace(delim, ''))) + ), + ( # Allow overshooting last line + rf'(?m)^(?P<prev>.+)${delim}(?P<current>.+)$(?!{delim})', + lambda mobj: (mobj.group().replace(delim, ' ') + if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT + else mobj.group()) + ), + ( # Avoid newline when a space is available b/w switch and description + DISABLE_PATCH, # This creates issues with prepare_manpage + r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim), + r'\1 ' + ), + ( # Replace brackets with a Markdown link + r'SponsorBlock API \((http.+)\)', + r'[SponsorBlock API](\1)' + ), +) + +readme = read_file(README_FILE) + +write_file(README_FILE, ''.join(( + take_section(readme, end=f'## {OPTIONS_START}'), + functools.reduce(apply_patch, PATCHES, options), + take_section(readme, f'# {OPTIONS_END}'), +))) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py new file mode 100644 index 0000000..01548ef --- /dev/null +++ b/devscripts/make_supportedsites.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from devscripts.utils import get_filename_args, write_file +from yt_dlp.extractor import list_extractor_classes + + +def main(): + out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False) + write_file(get_filename_args(), f'# Supported sites\n{out}\n') + + +if __name__ == '__main__': + main() diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py new file mode 100644 index 0000000..9b12e71 --- /dev/null +++ b/devscripts/prepare_manpage.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import os.path +import re + +from devscripts.utils import ( + compose_functions, + get_filename_args, + read_file, + write_file, +) + +ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +README_FILE = os.path.join(ROOT_DIR, 'README.md') + +PREFIX = r'''%yt-dlp(1) + +# NAME + +yt\-dlp \- A youtube-dl fork with additional features and patches + +# SYNOPSIS + +**yt-dlp** \[OPTIONS\] URL [URL...] + +# DESCRIPTION + +''' + + +def filter_excluded_sections(readme): + EXCLUDED_SECTION_BEGIN_STRING = re.escape('<!-- MANPAGE: BEGIN EXCLUDED SECTION -->') + EXCLUDED_SECTION_END_STRING = re.escape('<!-- MANPAGE: END EXCLUDED SECTION -->') + return re.sub( + rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n', + '', readme) + + +def move_sections(readme): + MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->' + sections = re.findall(r'(?m)^%s$' % ( + re.escape(MOVE_TAG_TEMPLATE).replace(r'\%', '%') % '(.+)'), readme) + + for section_name in sections: + move_tag = MOVE_TAG_TEMPLATE % section_name + if readme.count(move_tag) > 1: + raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected') + + sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme) + if len(sections) < 1: + raise Exception(f'The section {section_name} does not exist') + elif len(sections) > 1: + raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled') + + readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1) + return readme + + +def filter_options(readme): + section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) + options = '# OPTIONS\n' + for line in section.split('\n')[1:]: + mobj = re.fullmatch(r'''(?x) + \s{4}(?P<opt>-(?:,\s|[^\s])+) + (?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))? + (\s{2,}(?P<desc>.+))? + ''', line) + if not mobj: + options += f'{line.lstrip()}\n' + continue + option, metavar, description = mobj.group('opt', 'meta', 'desc') + + # Pandoc's definition_lists. See http://pandoc.org/README.html + option = f'{option} *{metavar}*' if metavar else option + description = f'{description}\n' if description else '' + options += f'\n{option}\n: {description}' + continue + + return readme.replace(section, options, 1) + + +TRANSFORM = compose_functions(filter_excluded_sections, move_sections, filter_options) + + +def main(): + write_file(get_filename_args(), PREFIX + TRANSFORM(read_file(README_FILE))) + + +if __name__ == '__main__': + main() diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat new file mode 100644 index 0000000..57b1f4b --- /dev/null +++ b/devscripts/run_tests.bat @@ -0,0 +1,4 @@ +@echo off + +>&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead +python %~dp0run_tests.py %~1 diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py new file mode 100755 index 0000000..6d638a9 --- /dev/null +++ b/devscripts/run_tests.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +import argparse +import functools +import os +import re +import subprocess +import sys +from pathlib import Path + + +fix_test_name = functools.partial(re.compile(r'IE(_all|_\d+)?$').sub, r'\1') + + +def parse_args(): + parser = argparse.ArgumentParser(description='Run selected yt-dlp tests') + parser.add_argument( + 'test', help='a extractor tests, or one of "core" or "download"', nargs='*') + parser.add_argument( + '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION') + return parser.parse_args() + + +def run_tests(*tests, pattern=None, ci=False): + run_core = 'core' in tests or (not pattern and not tests) + run_download = 'download' in tests + tests = list(map(fix_test_name, tests)) + + arguments = ['pytest', '-Werror', '--tb=short'] + if ci: + arguments.append('--color=yes') + if run_core: + arguments.extend(['-m', 'not download']) + elif run_download: + arguments.extend(['-m', 'download']) + elif pattern: + arguments.extend(['-k', pattern]) + else: + arguments.extend( + f'test/test_download.py::TestDownload::test_{test}' for test in tests) + + print(f'Running {arguments}', flush=True) + try: + return subprocess.call(arguments) + except FileNotFoundError: + pass + + arguments = [sys.executable, '-Werror', '-m', 'unittest'] + if run_core: + print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True) + return 1 + elif run_download: + arguments.append('test.test_download') + elif pattern: + arguments.extend(['-k', pattern]) + else: + arguments.extend( + f'test.test_download.TestDownload.test_{test}' for test in tests) + + print(f'Running {arguments}', flush=True) + return subprocess.call(arguments) + + +if __name__ == '__main__': + try: + args = parse_args() + + os.chdir(Path(__file__).parent.parent) + sys.exit(run_tests(*args.test, pattern=args.k, ci=bool(os.getenv('CI')))) + except KeyboardInterrupt: + pass diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh new file mode 100755 index 0000000..123ceb1 --- /dev/null +++ b/devscripts/run_tests.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env sh + +>&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead' +python3 devscripts/run_tests.py "$1" diff --git a/devscripts/set-variant.py b/devscripts/set-variant.py new file mode 100644 index 0000000..10341e7 --- /dev/null +++ b/devscripts/set-variant.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import argparse +import functools +import re + +from devscripts.utils import compose_functions, read_file, write_file + +VERSION_FILE = 'yt_dlp/version.py' + + +def parse_options(): + parser = argparse.ArgumentParser(description='Set the build variant of the package') + parser.add_argument('variant', help='Name of the variant') + parser.add_argument('-M', '--update-message', default=None, help='Message to show in -U') + return parser.parse_args() + + +def property_setter(name, value): + return functools.partial(re.sub, rf'(?m)^{name}\s*=\s*.+$', f'{name} = {value!r}') + + +opts = parse_options() +transform = compose_functions( + property_setter('VARIANT', opts.variant), + property_setter('UPDATE_HINT', opts.update_message) +) + +write_file(VERSION_FILE, transform(read_file(VERSION_FILE))) diff --git a/devscripts/tomlparse.py b/devscripts/tomlparse.py new file mode 100755 index 0000000..85ac4ee --- /dev/null +++ b/devscripts/tomlparse.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 + +""" +Simple parser for spec compliant toml files + +A simple toml parser for files that comply with the spec. +Should only be used to parse `pyproject.toml` for `install_deps.py`. + +IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED! +""" + +from __future__ import annotations + +import datetime +import json +import re + +WS = r'(?:[\ \t]*)' +STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'') +SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+') +KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*') +EQUALS_RE = re.compile(rf'={WS}') +WS_RE = re.compile(WS) + +_SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)' +EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE) + +LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*') +LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+') + + +def parse_key(value: str): + for match in SINGLE_KEY_RE.finditer(value): + if match[0][0] == '"': + yield json.loads(match[0]) + elif match[0][0] == '\'': + yield match[0][1:-1] + else: + yield match[0] + + +def get_target(root: dict, paths: list[str], is_list=False): + target = root + + for index, key in enumerate(paths, 1): + use_list = is_list and index == len(paths) + result = target.get(key) + if result is None: + result = [] if use_list else {} + target[key] = result + + if isinstance(result, dict): + target = result + elif use_list: + target = {} + result.append(target) + else: + target = result[-1] + + assert isinstance(target, dict) + return target + + +def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern): + index += 1 + + if match := ws_re.match(data, index): + index = match.end() + + while data[index] != end: + index = yield True, index + + if match := ws_re.match(data, index): + index = match.end() + + if data[index] == ',': + index += 1 + + if match := ws_re.match(data, index): + index = match.end() + + assert data[index] == end + yield False, index + 1 + + +def parse_value(data: str, index: int): + if data[index] == '[': + result = [] + + indices = parse_enclosed(data, index, ']', LIST_WS_RE) + valid, index = next(indices) + while valid: + index, value = parse_value(data, index) + result.append(value) + valid, index = indices.send(index) + + return index, result + + if data[index] == '{': + result = {} + + indices = parse_enclosed(data, index, '}', WS_RE) + valid, index = next(indices) + while valid: + valid, index = indices.send(parse_kv_pair(data, index, result)) + + return index, result + + if match := STRING_RE.match(data, index): + return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1] + + match = LEFTOVER_VALUE_RE.match(data, index) + assert match + value = match[0].strip() + for func in [ + int, + float, + datetime.time.fromisoformat, + datetime.date.fromisoformat, + datetime.datetime.fromisoformat, + {'true': True, 'false': False}.get, + ]: + try: + value = func(value) + break + except Exception: + pass + + return match.end(), value + + +def parse_kv_pair(data: str, index: int, target: dict): + match = KEY_RE.match(data, index) + if not match: + return None + + *keys, key = parse_key(match[0]) + + match = EQUALS_RE.match(data, match.end()) + assert match + index = match.end() + + index, value = parse_value(data, index) + get_target(target, keys)[key] = value + return index + + +def parse_toml(data: str): + root = {} + target = root + + index = 0 + while True: + match = EXPRESSION_RE.search(data, index) + if not match: + break + + if match.group('subtable'): + index = match.end() + path, is_list = match.group('path', 'is_list') + target = get_target(root, list(parse_key(path)), bool(is_list)) + continue + + index = parse_kv_pair(data, match.start(), target) + assert index is not None + + return root + + +def main(): + import argparse + from pathlib import Path + + parser = argparse.ArgumentParser() + parser.add_argument('infile', type=Path, help='The TOML file to read as input') + args = parser.parse_args() + + with args.infile.open('r', encoding='utf-8') as file: + data = file.read() + + def default(obj): + if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)): + return obj.isoformat() + + print(json.dumps(parse_toml(data), default=default)) + + +if __name__ == '__main__': + main() diff --git a/devscripts/update-version.py b/devscripts/update-version.py new file mode 100644 index 0000000..da54a6a --- /dev/null +++ b/devscripts/update-version.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import argparse +import contextlib +import sys +from datetime import datetime, timezone + +from devscripts.utils import read_version, run_process, write_file + + +def get_new_version(version, revision): + if not version: + version = datetime.now(timezone.utc).strftime('%Y.%m.%d') + + if revision: + assert revision.isdecimal(), 'Revision must be a number' + else: + old_version = read_version().split('.') + if version.split('.') == old_version[:3]: + revision = str(int((old_version + [0])[3]) + 1) + + return f'{version}.{revision}' if revision else version + + +def get_git_head(): + with contextlib.suppress(Exception): + return run_process('git', 'rev-parse', 'HEAD').stdout.strip() + + +VERSION_TEMPLATE = '''\ +# Autogenerated by devscripts/update-version.py + +__version__ = {version!r} + +RELEASE_GIT_HEAD = {git_head!r} + +VARIANT = None + +UPDATE_HINT = None + +CHANNEL = {channel!r} + +ORIGIN = {origin!r} + +_pkg_version = {package_version!r} +''' + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Update the version.py file') + parser.add_argument( + '-c', '--channel', default='stable', + help='Select update channel (default: %(default)s)') + parser.add_argument( + '-r', '--origin', default='local', + help='Select origin/repository (default: %(default)s)') + parser.add_argument( + '-s', '--suffix', default='', + help='Add an alphanumeric suffix to the package version, e.g. "dev"') + parser.add_argument( + '-o', '--output', default='yt_dlp/version.py', + help='The output file to write to (default: %(default)s)') + parser.add_argument( + 'version', nargs='?', default=None, + help='A version or revision to use instead of generating one') + args = parser.parse_args() + + git_head = get_git_head() + version = ( + args.version if args.version and '.' in args.version + else get_new_version(None, args.version)) + write_file(args.output, VERSION_TEMPLATE.format( + version=version, git_head=git_head, channel=args.channel, origin=args.origin, + package_version=f'{version}{args.suffix}')) + + print(f'version={version} ({args.channel}), head={git_head}') diff --git a/devscripts/utils.py b/devscripts/utils.py new file mode 100644 index 0000000..a952c9f --- /dev/null +++ b/devscripts/utils.py @@ -0,0 +1,47 @@ +import argparse +import functools +import subprocess + + +def read_file(fname): + with open(fname, encoding='utf-8') as f: + return f.read() + + +def write_file(fname, content, mode='w'): + with open(fname, mode, encoding='utf-8') as f: + return f.write(content) + + +def read_version(fname='yt_dlp/version.py', varname='__version__'): + """Get the version without importing the package""" + items = {} + exec(compile(read_file(fname), fname, 'exec'), items) + return items[varname] + + +def get_filename_args(has_infile=False, default_outfile=None): + parser = argparse.ArgumentParser() + if has_infile: + parser.add_argument('infile', help='Input file') + kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {} + parser.add_argument('outfile', **kwargs, help='Output file') + + opts = parser.parse_args() + if has_infile: + return opts.infile, opts.outfile + return opts.outfile + + +def compose_functions(*functions): + return lambda x: functools.reduce(lambda y, f: f(y), functions, x) + + +def run_process(*args, **kwargs): + kwargs.setdefault('text', True) + kwargs.setdefault('check', True) + kwargs.setdefault('capture_output', True) + if kwargs['text']: + kwargs.setdefault('encoding', 'utf-8') + kwargs.setdefault('errors', 'replace') + return subprocess.run(args, **kwargs) diff --git a/devscripts/zsh-completion.in b/devscripts/zsh-completion.in new file mode 100644 index 0000000..9117d33 --- /dev/null +++ b/devscripts/zsh-completion.in @@ -0,0 +1,30 @@ +#compdef yt-dlp + +__yt_dlp() { + local curcontext="$curcontext" fileopts diropts cur prev + typeset -A opt_args + fileopts="{{fileopts}}" + diropts="{{diropts}}" + cur=$words[CURRENT] + case $cur in + :) + _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)' + ;; + *) + prev=$words[CURRENT-1] + if [[ ${prev} =~ ${fileopts} ]]; then + _path_files + elif [[ ${prev} =~ ${diropts} ]]; then + _path_files -/ + elif [[ ${prev} == "--remux-video" ]]; then + _arguments '*: :(mp4 mkv)' + elif [[ ${prev} == "--recode-video" ]]; then + _arguments '*: :(mp4 flv ogg webm mkv)' + else + _arguments '*: :({{flags}})' + fi + ;; + esac +} + +__yt_dlp
\ No newline at end of file diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py new file mode 100755 index 0000000..267af5f --- /dev/null +++ b/devscripts/zsh-completion.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import yt_dlp + +ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp" +ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" + + +def build_completion(opt_parser): + opts = [opt for group in opt_parser.option_groups + for opt in group.option_list] + opts_file = [opt for opt in opts if opt.metavar == "FILE"] + opts_dir = [opt for opt in opts if opt.metavar == "DIR"] + + fileopts = [] + for opt in opts_file: + if opt._short_opts: + fileopts.extend(opt._short_opts) + if opt._long_opts: + fileopts.extend(opt._long_opts) + + diropts = [] + for opt in opts_dir: + if opt._short_opts: + diropts.extend(opt._short_opts) + if opt._long_opts: + diropts.extend(opt._long_opts) + + flags = [opt.get_opt_string() for opt in opts] + + with open(ZSH_COMPLETION_TEMPLATE) as f: + template = f.read() + + template = template.replace("{{fileopts}}", "|".join(fileopts)) + template = template.replace("{{diropts}}", "|".join(diropts)) + template = template.replace("{{flags}}", " ".join(flags)) + + with open(ZSH_COMPLETION_FILE, "w") as f: + f.write(template) + + +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] +build_completion(parser) |