diff options
Diffstat (limited to '')
104 files changed, 47425 insertions, 0 deletions
diff --git a/src/debputy/__init__.py b/src/debputy/__init__.py new file mode 100644 index 0000000..23ebc5f --- /dev/null +++ b/src/debputy/__init__.py @@ -0,0 +1,16 @@ +import pathlib + +from .version import IS_RELEASE_BUILD, __version__ + +# Replaced during install; must be a single line +# fmt: off +DEBPUTY_ROOT_DIR = pathlib.Path(__file__).parent.parent.parent +DEBPUTY_PLUGIN_ROOT_DIR = pathlib.Path(__file__).parent.parent.parent +# fmt: on + +if IS_RELEASE_BUILD: + DEBPUTY_DOC_ROOT_DIR = ( + f"https://salsa.debian.org/debian/debputy/-/blob/debian/{__version__}" + ) +else: + DEBPUTY_DOC_ROOT_DIR = "https://salsa.debian.org/debian/debputy/-/blob/main" diff --git a/src/debputy/_deb_options_profiles.py b/src/debputy/_deb_options_profiles.py new file mode 100644 index 0000000..fddb1b7 --- /dev/null +++ b/src/debputy/_deb_options_profiles.py @@ -0,0 +1,91 @@ +import os +from functools import lru_cache + +from typing import FrozenSet, Optional, Mapping, Dict + + +def _parse_deb_build_options(value: str) -> Mapping[str, Optional[str]]: + res: Dict[str, Optional[str]] = {} + for kvish in value.split(): + if "=" in kvish: + key, value = kvish.split("=", 1) + res[key] = value + else: + res[kvish] = None + return res + + +class DebBuildOptionsAndProfiles: + """Accessor to common environment related values + + >>> env = DebBuildOptionsAndProfiles(environ={'DEB_BUILD_PROFILES': 'noudeb nojava'}) + >>> 'noudeb' in env.deb_build_profiles + True + >>> 'nojava' in env.deb_build_profiles + True + >>> 'nopython' in env.deb_build_profiles + False + >>> sorted(env.deb_build_profiles) + ['nojava', 'noudeb'] + """ + + def __init__(self, *, environ: Optional[Mapping[str, str]] = None) -> None: + """Provide a view of the options. Though consider using DebBuildOptionsAndProfiles.instance() instead + + :param environ: Alternative to os.environ. Mostly useful for testing purposes + """ + if environ is None: + environ = os.environ + self._deb_build_profiles = frozenset( + x for x in environ.get("DEB_BUILD_PROFILES", "").split() + ) + self._deb_build_options = _parse_deb_build_options( + environ.get("DEB_BUILD_OPTIONS", "") + ) + + @staticmethod + @lru_cache(1) + def instance() -> "DebBuildOptionsAndProfiles": + return DebBuildOptionsAndProfiles() + + @property + def deb_build_profiles(self) -> FrozenSet[str]: + """A set-like view of all build profiles active during the build + + >>> env = DebBuildOptionsAndProfiles(environ={'DEB_BUILD_PROFILES': 'noudeb nojava'}) + >>> 'noudeb' in env.deb_build_profiles + True + >>> 'nojava' in env.deb_build_profiles + True + >>> 'nopython' in env.deb_build_profiles + False + >>> sorted(env.deb_build_profiles) + ['nojava', 'noudeb'] + + """ + return self._deb_build_profiles + + @property + def deb_build_options(self) -> Mapping[str, Optional[str]]: + """A set-like view of all build profiles active during the build + + >>> env = DebBuildOptionsAndProfiles(environ={'DEB_BUILD_OPTIONS': 'nostrip parallel=4'}) + >>> 'nostrip' in env.deb_build_options + True + >>> 'parallel' in env.deb_build_options + True + >>> 'noautodbgsym' in env.deb_build_options + False + >>> env.deb_build_options['nostrip'] is None + True + >>> env.deb_build_options['parallel'] + '4' + >>> env.deb_build_options['noautodbgsym'] + Traceback (most recent call last): + ... + KeyError: 'noautodbgsym' + >>> sorted(env.deb_build_options) + ['nostrip', 'parallel'] + + """ + return self._deb_build_options diff --git a/src/debputy/_manifest_constants.py b/src/debputy/_manifest_constants.py new file mode 100644 index 0000000..3ed992b --- /dev/null +++ b/src/debputy/_manifest_constants.py @@ -0,0 +1,49 @@ +from typing import Literal + +DEFAULT_MANIFEST_VERSION = "0.1" +SUPPORTED_MANIFEST_VERSIONS = frozenset(["0.1"]) +ManifestVersion = Literal["0.1"] +assert DEFAULT_MANIFEST_VERSION in SUPPORTED_MANIFEST_VERSIONS + +MK_MANIFEST_VERSION = "manifest-version" +MK_PACKAGES = "packages" + +MK_INSTALLATIONS = "installations" +MK_INSTALLATIONS_INSTALL = "install" +MK_INSTALLATIONS_MULTI_DEST_INSTALL = "multi-dest-install" +MK_INSTALLATIONS_INSTALL_DOCS = "install-docs" +MK_INSTALLATIONS_INSTALL_EXAMPLES = "install-examples" +MK_INSTALLATIONS_INSTALL_MAN = "install-man" +MK_INSTALLATIONS_DISCARD = "discard" + +MK_INSTALLATIONS_INSTALL_SOURCE = "source" +MK_INSTALLATIONS_INSTALL_SOURCES = "sources" +MK_INSTALLATIONS_INSTALL_DEST_DIR = "dest-dir" +MK_INSTALLATIONS_INSTALL_AS = "as" +MK_INSTALLATIONS_INSTALL_INTO = "into" + +MK_INSTALLATIONS_INSTALL_MAN_LANGUAGE = "language" + +MK_CONDITION_WHEN = "when" +MK_CONDITION_ARCH_MATCHES = "arch-matches" +MK_CONDITION_BUILD_PROFILES_MATCHES = "build-profiles-matches" + +MK_TRANSFORMATIONS = "transformations" + +MK_TRANSFORMATIONS_CREATE_SYMLINK = "create-symlink" +MK_TRANSFORMATIONS_CREATE_SYMLINK_LINK_PATH = "path" +MK_TRANSFORMATIONS_CREATE_SYMLINK_LINK_TARGET = "target" + +MK_CONFFILE_MANAGEMENT = "conffile-management" +MK_CONFFILE_MANAGEMENT_REMOVE = "remove" +MK_CONFFILE_MANAGEMENT_RENAME = "rename" + +MK_CONFFILE_MANAGEMENT_REMOVE_PATH = "path" +MK_CONFFILE_MANAGEMENT_RENAME_SOURCE = "source" +MK_CONFFILE_MANAGEMENT_RENAME_TARGET = "target" + +MK_CONFFILE_MANAGEMENT_X_PRIOR_TO_VERSION = "prior-to-version" +MK_CONFFILE_MANAGEMENT_X_OWNING_PACKAGE = "owning-package" + +MK_MANIFEST_DEFINITIONS = "definitions" +MK_MANIFEST_VARIABLES = "variables" diff --git a/src/debputy/architecture_support.py b/src/debputy/architecture_support.py new file mode 100644 index 0000000..e190722 --- /dev/null +++ b/src/debputy/architecture_support.py @@ -0,0 +1,233 @@ +import os +import subprocess +from functools import lru_cache +from typing import Dict, Optional, Iterator, Tuple + + +class DpkgArchitectureBuildProcessValuesTable: + """Dict-like interface to dpkg-architecture values""" + + def __init__(self, *, mocked_answers: Optional[Dict[str, str]] = None) -> None: + """Create a new dpkg-architecture table; NO INSTANTIATION + + This object will be created for you; if you need a production instance + then call dpkg_architecture_table(). If you need a testing instance, + then call mock_arch_table(...) + + :param mocked_answers: Used for testing purposes. Do not use directly; + instead use mock_arch_table(...) to create the table you want. + """ + self._architecture_cache: Dict[str, str] = {} + self._has_run_dpkg_architecture = False + if mocked_answers is None: + self._architecture_cache = {} + self._respect_environ: bool = True + self._has_run_dpkg_architecture = False + else: + self._architecture_cache = mocked_answers + self._respect_environ = False + self._has_run_dpkg_architecture = True + + def __contains__(self, item: str) -> bool: + try: + self[item] + except KeyError: + return False + else: + return True + + def __getitem__(self, item: str) -> str: + if item not in self._architecture_cache: + if self._respect_environ: + value = os.environ.get(item) + if value is not None: + self._architecture_cache[item] = value + return value + if not self._has_run_dpkg_architecture: + self._load_dpkg_architecture_values() + # Fall through and look it up in the cache + return self._architecture_cache[item] + + def __iter__(self) -> Iterator[str]: + if not self._has_run_dpkg_architecture: + self._load_dpkg_architecture_values() + yield from self._architecture_cache + + @property + def current_host_arch(self) -> str: + """The architecture we are building for + + This is the architecture name you need if you are in doubt. + """ + return self["DEB_HOST_ARCH"] + + @property + def current_host_multiarch(self) -> str: + """The multi-arch path basename + + This is the multi-arch basename name you need if you are in doubt. It + goes here: + + "/usr/lib/{MA}".format(table.current_host_multiarch) + + """ + return self["DEB_HOST_MULTIARCH"] + + @property + def is_cross_compiling(self) -> bool: + """Whether we are cross-compiling + + This is defined as DEB_BUILD_GNU_TYPE != DEB_HOST_GNU_TYPE and + affects whether we can rely on being able to run the binaries + that are compiled. + """ + return self["DEB_BUILD_GNU_TYPE"] != self["DEB_HOST_GNU_TYPE"] + + def _load_dpkg_architecture_values(self) -> None: + env = dict(os.environ) + # For performance, disable dpkg's translation later + env["DPKG_NLS"] = "0" + kw_pairs = _parse_dpkg_arch_output( + subprocess.check_output( + ["dpkg-architecture"], + env=env, + ) + ) + for k, v in kw_pairs: + self._architecture_cache[k] = os.environ.get(k, v) + self._has_run_dpkg_architecture = True + + +def _parse_dpkg_arch_output(output: bytes) -> Iterator[Tuple[str, str]]: + text = output.decode("utf-8") + for line in text.splitlines(): + k, v = line.strip().split("=", 1) + yield k, v + + +def _rewrite(value: str, from_pattern: str, to_pattern: str) -> str: + assert value.startswith(from_pattern) + return to_pattern + value[len(from_pattern) :] + + +def faked_arch_table( + host_arch: str, + *, + build_arch: Optional[str] = None, + target_arch: Optional[str] = None, +) -> DpkgArchitectureBuildProcessValuesTable: + """Creates a mocked instance of DpkgArchitectureBuildProcessValuesTable + + + :param host_arch: The dpkg architecture to mock answers for. This affects + DEB_HOST_* values and defines the default for DEB_{BUILD,TARGET}_* if + not overridden. + :param build_arch: If set and has a different value than host_arch, then + pretend this is a cross-build. This value affects the DEB_BUILD_* values. + :param target_arch: If set and has a different value than host_arch, then + pretend this is a build _of_ a cross-compiler. This value affects the + DEB_TARGET_* values. + """ + + if build_arch is None: + build_arch = host_arch + + if target_arch is None: + target_arch = host_arch + return _faked_arch_tables(host_arch, build_arch, target_arch) + + +@lru_cache +def _faked_arch_tables( + host_arch: str, build_arch: str, target_arch: str +) -> DpkgArchitectureBuildProcessValuesTable: + mock_table = {} + + env = dict(os.environ) + # Set CC to /bin/true avoid a warning from dpkg-architecture + env["CC"] = "/bin/true" + # For performance, disable dpkg's translation later + env["DPKG_NLS"] = "0" + # Clear environ variables that might confuse dpkg-architecture + for k in os.environ: + if k.startswith("DEB_"): + del env[k] + + if build_arch == host_arch: + # easy / common case - we can handle this with a single call + kw_pairs = _parse_dpkg_arch_output( + subprocess.check_output( + ["dpkg-architecture", "-a", host_arch, "-A", target_arch], + env=env, + ) + ) + for k, v in kw_pairs: + if k.startswith(("DEB_HOST_", "DEB_TARGET_")): + mock_table[k] = v + # Clone DEB_HOST_* into DEB_BUILD_* as well + if k.startswith("DEB_HOST_"): + k2 = _rewrite(k, "DEB_HOST_", "DEB_BUILD_") + mock_table[k2] = v + elif build_arch != host_arch and host_arch != target_arch: + # This will need two dpkg-architecture calls because we cannot set + # DEB_BUILD_* directly. But we can set DEB_HOST_* and then rewrite + # it + # First handle the build arch + kw_pairs = _parse_dpkg_arch_output( + subprocess.check_output( + ["dpkg-architecture", "-a", build_arch], + env=env, + ) + ) + for k, v in kw_pairs: + if k.startswith("DEB_HOST_"): + k = _rewrite(k, "DEB_HOST_", "DEB_BUILD_") + mock_table[k] = v + + kw_pairs = _parse_dpkg_arch_output( + subprocess.check_output( + ["dpkg-architecture", "-a", host_arch, "-A", target_arch], + env=env, + ) + ) + for k, v in kw_pairs: + if k.startswith(("DEB_HOST_", "DEB_TARGET_")): + mock_table[k] = v + else: + # This is a fun special case. We know that: + # * build_arch != host_arch + # * host_arch == target_arch + # otherwise we would have hit one of the previous cases. + # + # We can do this in a single call to dpkg-architecture by + # a bit of "cleaver" rewriting. + # + # - Use -a to set DEB_HOST_* and then rewrite that as + # DEB_BUILD_* + # - use -A to set DEB_TARGET_* and then use that for both + # DEB_HOST_* and DEB_TARGET_* + + kw_pairs = _parse_dpkg_arch_output( + subprocess.check_output( + ["dpkg-architecture", "-a", build_arch, "-A", target_arch], env=env + ) + ) + for k, v in kw_pairs: + if k.startswith("DEB_HOST_"): + k2 = _rewrite(k, "DEB_HOST_", "DEB_BUILD_") + mock_table[k2] = v + continue + if k.startswith("DEB_TARGET_"): + mock_table[k] = v + k2 = _rewrite(k, "DEB_TARGET_", "DEB_HOST_") + mock_table[k2] = v + + table = DpkgArchitectureBuildProcessValuesTable(mocked_answers=mock_table) + return table + + +_ARCH_TABLE = DpkgArchitectureBuildProcessValuesTable() + + +def dpkg_architecture_table() -> DpkgArchitectureBuildProcessValuesTable: + return _ARCH_TABLE diff --git a/src/debputy/builtin_manifest_rules.py b/src/debputy/builtin_manifest_rules.py new file mode 100644 index 0000000..c8e6557 --- /dev/null +++ b/src/debputy/builtin_manifest_rules.py @@ -0,0 +1,261 @@ +import re +from typing import Iterable, Tuple, Optional + +from debputy.architecture_support import DpkgArchitectureBuildProcessValuesTable +from debputy.exceptions import PureVirtualPathError, TestPathWithNonExistentFSPathError +from debputy.intermediate_manifest import PathType +from debputy.manifest_parser.base_types import SymbolicMode, OctalMode, FileSystemMode +from debputy.manifest_parser.util import AttributePath +from debputy.packages import BinaryPackage +from debputy.path_matcher import ( + MATCH_ANYTHING, + MatchRule, + ExactFileSystemPath, + DirectoryBasedMatch, + MatchRuleType, + BasenameGlobMatch, +) +from debputy.substitution import Substitution +from debputy.types import VP +from debputy.util import _normalize_path, perl_module_dirs + +# Imported from dh_fixperms +_PERMISSION_NORMALIZATION_SOURCE_DEFINITION = "permission normalization" +attribute_path = AttributePath.builtin_path()[ + _PERMISSION_NORMALIZATION_SOURCE_DEFINITION +] +_STD_FILE_MODE = OctalMode(0o644) +_PATH_FILE_MODE = OctalMode(0o755) +_HAS_BIN_SHBANG_RE = re.compile(rb"^#!\s*/(?:usr/)?s?bin", re.ASCII) + + +class _UsrShareDocMatchRule(DirectoryBasedMatch): + def __init__(self) -> None: + super().__init__( + MatchRuleType.ANYTHING_BENEATH_DIR, + _normalize_path("usr/share/doc", with_prefix=True), + path_type=PathType.FILE, + ) + + def finditer(self, fs_root: VP, *, ignore_paths=None) -> Iterable[VP]: + doc_dir = fs_root.lookup(self._directory) + if doc_dir is None: + return + for path_in_doc_dir in doc_dir.iterdir: + if ignore_paths is not None and ignore_paths(path_in_doc_dir): + continue + if path_in_doc_dir.is_file: + yield path_in_doc_dir + for subpath in path_in_doc_dir.iterdir: + if subpath.name == "examples" and subpath.is_dir: + continue + if ignore_paths is not None: + yield from ( + f + for f in subpath.all_paths() + if f.is_file and not ignore_paths(f) + ) + else: + yield from (f for f in subpath.all_paths() if f.is_file) + + def describe_match_short(self) -> str: + return f"All files beneath {self._directory}/ except .../<pkg>/examples" + + def describe_match_exact(self) -> str: + return self.describe_match_short() + + +class _ShebangScriptFiles(MatchRule): + def __init__(self) -> None: + super().__init__(MatchRuleType.GENERIC_GLOB) + + def finditer(self, fs_root: VP, *, ignore_paths=None) -> Iterable[VP]: + for p in fs_root.all_paths(): + if not p.is_file or (ignore_paths and ignore_paths(p)): + continue + try: + with p.open(byte_io=True) as fd: + c = fd.read(32) + except (PureVirtualPathError, TestPathWithNonExistentFSPathError): + continue + if _HAS_BIN_SHBANG_RE.match(c): + yield p + + @property + def path_type(self) -> Optional[PathType]: + return PathType.FILE + + def _full_pattern(self) -> str: + return "built-in - not a valid pattern" + + def describe_match_short(self) -> str: + return "All scripts with a absolute #!-line for /(s)bin or /usr/(s)bin" + + def describe_match_exact(self) -> str: + return self.describe_match_short() + + +USR_SHARE_DOC_MATCH_RULE = _UsrShareDocMatchRule() +SHEBANG_SCRIPTS = _ShebangScriptFiles() +del _UsrShareDocMatchRule +del _ShebangScriptFiles + + +def builtin_mode_normalization_rules( + dpkg_architecture_variables: DpkgArchitectureBuildProcessValuesTable, + dctrl_bin: BinaryPackage, + substitution: Substitution, +) -> Iterable[Tuple[MatchRule, FileSystemMode]]: + yield from ( + ( + MatchRule.from_path_or_glob( + x, + _PERMISSION_NORMALIZATION_SOURCE_DEFINITION, + path_type=PathType.FILE, + ), + _STD_FILE_MODE, + ) + for x in ( + "*.so.*", + "*.so", + "*.la", + "*.a", + "*.js", + "*.css", + "*.scss", + "*.sass", + "*.jpeg", + "*.jpg", + "*.png", + "*.gif", + "*.cmxs", + "*.node", + ) + ) + + yield from ( + ( + MatchRule.recursive_beneath_directory( + x, + _PERMISSION_NORMALIZATION_SOURCE_DEFINITION, + path_type=PathType.FILE, + ), + _STD_FILE_MODE, + ) + for x in ( + "usr/share/man", + "usr/include", + "usr/share/applications", + "usr/share/lintian/overrides", + ) + ) + + # The dh_fixperms tool recuses for these directories, but probably should not (see #1006927) + yield from ( + ( + MatchRule.from_path_or_glob( + f"{x}/*", + _PERMISSION_NORMALIZATION_SOURCE_DEFINITION, + path_type=PathType.FILE, + ), + _PATH_FILE_MODE, + ) + for x in ( + "usr/bin", + "usr/bin/mh", + "bin", + "usr/sbin", + "sbin", + "usr/games", + "usr/libexec", + "etc/init.d", + ) + ) + + yield ( + # Strictly speaking, dh_fixperms does a recursive search but in practice, it does not matter. + MatchRule.from_path_or_glob( + "etc/sudoers.d/*", + _PERMISSION_NORMALIZATION_SOURCE_DEFINITION, + path_type=PathType.FILE, + ), + OctalMode(0o440), + ) + + # The reportbug rule + yield ( + ExactFileSystemPath( + substitution.substitute( + _normalize_path("usr/share/bug/{{PACKAGE}}"), + _PERMISSION_NORMALIZATION_SOURCE_DEFINITION, + ) + ), + OctalMode(0o755), + ) + + yield ( + MatchRule.recursive_beneath_directory( + "usr/share/bug/{{PACKAGE}}", + _PERMISSION_NORMALIZATION_SOURCE_DEFINITION, + path_type=PathType.FILE, + substitution=substitution, + ), + OctalMode(0o644), + ) + + yield ( + ExactFileSystemPath( + substitution.substitute( + _normalize_path("usr/share/bug/{{PACKAGE}}/script"), + _PERMISSION_NORMALIZATION_SOURCE_DEFINITION, + ) + ), + OctalMode(0o755), + ) + + yield ( + USR_SHARE_DOC_MATCH_RULE, + OctalMode(0o0644), + ) + + yield from ( + ( + BasenameGlobMatch( + "*.pm", + only_when_in_directory=perl_dir, + path_type=PathType.FILE, + recursive_match=True, + ), + SymbolicMode.parse_filesystem_mode( + "a-x", + attribute_path['"*.pm'], + ), + ) + for perl_dir in perl_module_dirs(dpkg_architecture_variables, dctrl_bin) + ) + + yield ( + BasenameGlobMatch( + "*.ali", + only_when_in_directory=_normalize_path("usr/lib"), + path_type=PathType.FILE, + recursive_match=True, + ), + SymbolicMode.parse_filesystem_mode( + "a-w", + attribute_path['"*.ali"'], + ), + ) + + yield ( + SHEBANG_SCRIPTS, + _PATH_FILE_MODE, + ) + + yield ( + MATCH_ANYTHING, + SymbolicMode.parse_filesystem_mode( + "go=rX,u+rw,a-s", + attribute_path["**/*"], + ), + ) diff --git a/src/debputy/commands/__init__.py b/src/debputy/commands/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/commands/__init__.py diff --git a/src/debputy/commands/deb_materialization.py b/src/debputy/commands/deb_materialization.py new file mode 100644 index 0000000..58764d0 --- /dev/null +++ b/src/debputy/commands/deb_materialization.py @@ -0,0 +1,587 @@ +#!/usr/bin/python3 -B +import argparse +import collections +import contextlib +import json +import os +import subprocess +import sys +import tempfile +import textwrap +from datetime import datetime +from typing import Optional, List, Iterator, Dict, Tuple + +from debputy import DEBPUTY_ROOT_DIR +from debputy.intermediate_manifest import ( + TarMember, + PathType, + output_intermediate_manifest, + output_intermediate_manifest_to_fd, +) +from debputy.util import ( + _error, + _info, + compute_output_filename, + resolve_source_date_epoch, + ColorizedArgumentParser, + setup_logging, + detect_fakeroot, + print_command, + program_name, +) +from debputy.version import __version__ + + +def parse_args() -> argparse.Namespace: + description = textwrap.dedent( + """\ + This is a low level tool for materializing deb packages from intermediate debputy manifests or assembling + the deb from a materialization. + + The tool is not intended to be run directly by end users. + """ + ) + + parser = ColorizedArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter, + allow_abbrev=False, + prog=program_name(), + ) + + parser.add_argument("--version", action="version", version=__version__) + + subparsers = parser.add_subparsers(dest="command", required=True) + + materialize_deb_parser = subparsers.add_parser( + "materialize-deb", + allow_abbrev=False, + help="Generate .deb/.udebs structure from a root directory and" + " a *intermediate* debputy manifest", + ) + materialize_deb_parser.add_argument( + "control_root_dir", + metavar="control-root-dir", + help="A directory that contains the control files (usually debian/<pkg>/DEBIAN)", + ) + materialize_deb_parser.add_argument( + "materialization_output", + metavar="materialization_output", + help="Where to place the resulting structure should be placed. Should not exist", + ) + materialize_deb_parser.add_argument( + "--discard-existing-output", + dest="discard_existing_output", + default=False, + action="store_true", + help="If passed, then the output location may exist." + " If it does, it will be *deleted*.", + ) + materialize_deb_parser.add_argument( + "--source-date-epoch", + dest="source_date_epoch", + action="store", + type=int, + default=None, + help="Source date epoch (can also be given via the SOURCE_DATE_EPOCH environ" + " variable", + ) + materialize_deb_parser.add_argument( + "--may-move-control-files", + dest="may_move_control_files", + action="store_true", + default=False, + help="Whether the command may optimize by moving (rather than copying) DEBIAN files", + ) + materialize_deb_parser.add_argument( + "--may-move-data-files", + dest="may_move_data_files", + action="store_true", + default=False, + help="Whether the command may optimize by moving (rather than copying) when materializing", + ) + + materialize_deb_parser.add_argument( + "--intermediate-package-manifest", + dest="package_manifest", + metavar="JSON_FILE", + action="store", + default=None, + help="INTERMEDIATE package manifest (JSON!)", + ) + + materialize_deb_parser.add_argument( + "--udeb", + dest="udeb", + default=False, + action="store_true", + help="Whether this is udeb package. Affects extension and default compression", + ) + + materialize_deb_parser.add_argument( + "--build-method", + dest="build_method", + choices=["debputy", "dpkg-deb"], + type=str, + default=None, + help="Immediately assemble the deb as well using the selected method", + ) + materialize_deb_parser.add_argument( + "--assembled-deb-output", + dest="assembled_deb_output", + type=str, + default=None, + help="Where to place the resulting deb. Only applicable with --build-method", + ) + + # Added for "help only" - you cannot trigger this option in practice + materialize_deb_parser.add_argument( + "--", + metavar="DPKG_DEB_ARGS", + action="extend", + nargs="+", + dest="unused", + help="Arguments to be passed to dpkg-deb" + " (same as you might pass to dh_builddeb).", + ) + + build_deb_structure = subparsers.add_parser( + "build-materialized-deb", + allow_abbrev=False, + help="Produce a .deb from a directory produced by the" + " materialize-deb-structure command", + ) + build_deb_structure.add_argument( + "materialized_deb_root_dir", + metavar="materialized-deb-root-dir", + help="The output directory of the materialize-deb-structure command", + ) + build_deb_structure.add_argument( + "build_method", + metavar="build-method", + choices=["debputy", "dpkg-deb"], + type=str, + default="dpkg-deb", + help="Which tool should assemble the deb", + ) + build_deb_structure.add_argument( + "--output", type=str, default=None, help="Where to place the resulting deb" + ) + + argv = sys.argv + try: + i = argv.index("--") + upstream_args = argv[i + 1 :] + argv = argv[:i] + except (IndexError, ValueError): + upstream_args = [] + parsed_args = parser.parse_args(argv[1:]) + setattr(parsed_args, "upstream_args", upstream_args) + + return parsed_args + + +def _run(cmd: List[str]) -> None: + print_command(*cmd) + subprocess.check_call(cmd) + + +def strip_path_prefix(member_path: str) -> str: + if not member_path.startswith("./"): + _error( + f'Invalid manifest: "{member_path}" does not start with "./", but all paths should' + ) + return member_path[2:] + + +def _perform_data_tar_materialization( + output_packaging_root: str, + intermediate_manifest: List[TarMember], + may_move_data_files: bool, +) -> List[Tuple[str, TarMember]]: + start_time = datetime.now() + replacement_manifest_paths = [] + _info("Materializing data.tar part of the deb:") + + directories = ["mkdir"] + symlinks = [] + bulk_copies: Dict[str, List[str]] = collections.defaultdict(list) + copies = [] + renames = [] + + for tar_member in intermediate_manifest: + member_path = strip_path_prefix(tar_member.member_path) + new_fs_path = ( + os.path.join("deb-root", member_path) if member_path else "deb-root" + ) + materialization_path = ( + f"{output_packaging_root}/{member_path}" + if member_path + else output_packaging_root + ) + replacement_tar_member = tar_member + materialization_parent_dir = os.path.dirname(materialization_path.rstrip("/")) + if tar_member.path_type == PathType.DIRECTORY: + directories.append(materialization_path) + elif tar_member.path_type == PathType.SYMLINK: + symlinks.append((tar_member.link_target, materialization_path)) + elif tar_member.fs_path is not None: + if tar_member.link_target: + # Not sure if hardlinks gets here yet as we do not support hardlinks + _error("Internal error; hardlink not supported") + + if may_move_data_files and tar_member.may_steal_fs_path: + renames.append((tar_member.fs_path, materialization_path)) + elif os.path.basename(tar_member.fs_path) == os.path.basename( + materialization_path + ): + bulk_copies[materialization_parent_dir].append(tar_member.fs_path) + else: + copies.append((tar_member.fs_path, materialization_path)) + else: + _error(f"Internal error; unsupported path type {tar_member.path_type}") + + if tar_member.fs_path is not None: + replacement_tar_member = tar_member.clone_and_replace( + fs_path=new_fs_path, may_steal_fs_path=False + ) + + replacement_manifest_paths.append( + (materialization_path, replacement_tar_member) + ) + + if len(directories) > 1: + _run(directories) + + for dest_dir, files in bulk_copies.items(): + cmd = ["cp", "--reflink=auto", "-t", dest_dir] + cmd.extend(files) + _run(cmd) + + for source, dest in copies: + _run(["cp", "--reflink=auto", source, dest]) + + for source, dest in renames: + print_command("mv", source, dest) + os.rename(source, dest) + + for link_target, link_path in symlinks: + print_command("ln", "-s", link_target, link_path) + os.symlink(link_target, link_path) + + end_time = datetime.now() + + _info(f"Materialization of data.tar finished, took: {end_time - start_time}") + + return replacement_manifest_paths + + +def materialize_deb( + control_root_dir: str, + intermediate_manifest_path: Optional[str], + source_date_epoch: int, + dpkg_deb_options: List[str], + is_udeb: bool, + output_dir: str, + may_move_control_files: bool, + may_move_data_files: bool, +) -> None: + if not os.path.isfile(f"{control_root_dir}/control"): + _error( + f'The directory "{control_root_dir}" does not look like a package root dir (there is no control file)' + ) + intermediate_manifest: List[TarMember] = parse_manifest(intermediate_manifest_path) + + output_packaging_root = os.path.join(output_dir, "deb-root") + os.mkdir(output_dir) + + replacement_manifest_paths = _perform_data_tar_materialization( + output_packaging_root, intermediate_manifest, may_move_data_files + ) + for materialization_path, tar_member in reversed(replacement_manifest_paths): + # TODO: Hardlinks should probably skip these commands + if tar_member.path_type != PathType.SYMLINK: + os.chmod(materialization_path, tar_member.mode, follow_symlinks=False) + os.utime( + materialization_path, + (tar_member.mtime, tar_member.mtime), + follow_symlinks=False, + ) + + materialized_ctrl_dir = f"{output_packaging_root}/DEBIAN" + if may_move_control_files: + print_command("mv", control_root_dir, materialized_ctrl_dir) + os.rename(control_root_dir, materialized_ctrl_dir) + else: + os.mkdir(materialized_ctrl_dir) + copy_cmd = ["cp", "-a"] + copy_cmd.extend( + os.path.join(control_root_dir, f) for f in os.listdir(control_root_dir) + ) + copy_cmd.append(materialized_ctrl_dir) + _run(copy_cmd) + + output_intermediate_manifest( + os.path.join(output_dir, "deb-structure-intermediate-manifest.json"), + [t[1] for t in replacement_manifest_paths], + ) + + with open(os.path.join(output_dir, "env-and-cli.json"), "w") as fd: + serial_format = { + "env": { + "SOURCE_DATE_EPOCH": str(source_date_epoch), + "DPKG_DEB_COMPRESSOR_LEVEL": os.environ.get( + "DPKG_DEB_COMPRESSOR_LEVEL" + ), + "DPKG_DEB_COMPRESSOR_TYPE": os.environ.get("DPKG_DEB_COMPRESSOR_TYPE"), + "DPKG_DEB_THREADS_MAX": os.environ.get("DPKG_DEB_THREADS_MAX"), + }, + "cli": {"dpkg-deb": dpkg_deb_options}, + "udeb": is_udeb, + } + json.dump(serial_format, fd) + + +def apply_fs_metadata( + materialized_path: str, + tar_member: TarMember, + apply_ownership: bool, + is_using_fakeroot: bool, +) -> None: + if apply_ownership: + os.chown( + materialized_path, tar_member.uid, tar_member.gid, follow_symlinks=False + ) + # To avoid surprises, align these with the manifest. Just in case the transport did not preserve the metadata. + # Also, unsure whether metadata changes cause directory mtimes to change, so resetting them unconditionally + # also prevents that problem. + if tar_member.path_type != PathType.SYMLINK: + os.chmod(materialized_path, tar_member.mode, follow_symlinks=False) + os.utime( + materialized_path, (tar_member.mtime, tar_member.mtime), follow_symlinks=False + ) + if is_using_fakeroot: + st = os.stat(materialized_path, follow_symlinks=False) + if st.st_uid != tar_member.uid or st.st_gid != tar_member.gid: + _error( + 'Change of ownership failed. The chown call "succeeded" but stat does not give the right result.' + " Most likely a fakeroot bug. Note, when verifying this, use os.chown + os.stat from python" + " (the chmod/stat shell commands might use a different syscall that fakeroot accurately emulates)" + ) + + +def _dpkg_deb_root_requirements( + intermediate_manifest: List[TarMember], +) -> Tuple[List[str], bool, bool]: + needs_root = any(tm.uid != 0 or tm.gid != 0 for tm in intermediate_manifest) + if needs_root: + if os.getuid() != 0: + _error( + 'Must be run as root/fakeroot when using the method "dpkg-deb" due to the contents' + ) + is_using_fakeroot = detect_fakeroot() + deb_cmd = ["dpkg-deb"] + _info("Applying ownership, mode, and utime from the intermediate manifest...") + else: + # fakeroot does not matter in this case + is_using_fakeroot = False + deb_cmd = ["dpkg-deb", "--root-owner-group"] + _info("Applying mode and utime from the intermediate manifest...") + return deb_cmd, needs_root, is_using_fakeroot + + +@contextlib.contextmanager +def maybe_with_materialized_manifest( + content: Optional[List[TarMember]], +) -> Iterator[Optional[str]]: + if content is not None: + with tempfile.NamedTemporaryFile( + prefix="debputy-mat-build", + mode="w+t", + suffix=".json", + encoding="utf-8", + ) as fd: + output_intermediate_manifest_to_fd(fd, content) + fd.flush() + yield fd.name + else: + yield None + + +def _prep_assembled_deb_output_path( + output_path: Optional[str], + materialized_deb_structure: str, + deb_root: str, + method: str, + is_udeb: bool, +) -> str: + if output_path is None: + ext = "udeb" if is_udeb else "deb" + output_dir = os.path.join(materialized_deb_structure, "output") + if not os.path.isdir(output_dir): + os.mkdir(output_dir) + output = os.path.join(output_dir, f"{method}.{ext}") + elif os.path.isdir(output_path): + output = os.path.join( + output_path, + compute_output_filename(os.path.join(deb_root, "DEBIAN"), is_udeb), + ) + else: + output = output_path + return output + + +def _apply_env(env: Dict[str, Optional[str]]) -> None: + for name, value in env.items(): + if value is not None: + os.environ[name] = value + else: + try: + del os.environ[name] + except KeyError: + pass + + +def assemble_deb( + materialized_deb_structure: str, + method: str, + output_path: Optional[str], + combined_materialization_and_assembly: bool, +) -> None: + deb_root = os.path.join(materialized_deb_structure, "deb-root") + + with open(os.path.join(materialized_deb_structure, "env-and-cli.json"), "r") as fd: + serial_format = json.load(fd) + + env = serial_format.get("env") or {} + cli = serial_format.get("cli") or {} + is_udeb = serial_format.get("udeb") + source_date_epoch = env.get("SOURCE_DATE_EPOCH") + dpkg_deb_options = cli.get("dpkg-deb") or [] + intermediate_manifest_path = os.path.join( + materialized_deb_structure, "deb-structure-intermediate-manifest.json" + ) + original_intermediate_manifest = TarMember.parse_intermediate_manifest( + intermediate_manifest_path + ) + _info( + "Rebasing relative paths in the intermediate manifest so they are relative to current working directory ..." + ) + intermediate_manifest = [ + ( + tar_member.clone_and_replace( + fs_path=os.path.join(materialized_deb_structure, tar_member.fs_path) + ) + if tar_member.fs_path is not None and not tar_member.fs_path.startswith("/") + else tar_member + ) + for tar_member in original_intermediate_manifest + ] + materialized_manifest = None + if method == "debputy": + materialized_manifest = intermediate_manifest + + if source_date_epoch is None: + _error( + "Cannot reproduce the deb. No source date epoch provided in the materialized deb root." + ) + _apply_env(env) + + output = _prep_assembled_deb_output_path( + output_path, + materialized_deb_structure, + deb_root, + method, + is_udeb, + ) + + with maybe_with_materialized_manifest(materialized_manifest) as tmp_file: + if method == "dpkg-deb": + deb_cmd, needs_root, is_using_fakeroot = _dpkg_deb_root_requirements( + intermediate_manifest + ) + if needs_root or not combined_materialization_and_assembly: + for tar_member in reversed(intermediate_manifest): + p = os.path.join( + deb_root, strip_path_prefix(tar_member.member_path) + ) + apply_fs_metadata(p, tar_member, needs_root, is_using_fakeroot) + elif method == "debputy": + deb_packer = os.path.join(DEBPUTY_ROOT_DIR, "deb_packer.py") + assert tmp_file is not None + deb_cmd = [ + deb_packer, + "--intermediate-package-manifest", + tmp_file, + "--source-date-epoch", + source_date_epoch, + ] + else: + _error(f"Internal error: Unsupported assembly method: {method}") + + if is_udeb: + deb_cmd.extend(["-z6", "-Zxz", "-Sextreme"]) + deb_cmd.extend(dpkg_deb_options) + deb_cmd.extend(["--build", deb_root, output]) + start_time = datetime.now() + _run(deb_cmd) + end_time = datetime.now() + _info(f" - assembly command took {end_time - start_time}") + + +def parse_manifest(manifest_path: "Optional[str]") -> "List[TarMember]": + if manifest_path is None: + _error("--intermediate-package-manifest is mandatory for now") + return TarMember.parse_intermediate_manifest(manifest_path) + + +def main() -> None: + setup_logging() + parsed_args = parse_args() + if parsed_args.command == "materialize-deb": + mtime = resolve_source_date_epoch(parsed_args.source_date_epoch) + dpkg_deb_args = parsed_args.upstream_args or [] + output_dir = parsed_args.materialization_output + if os.path.exists(output_dir): + if not parsed_args.discard_existing_output: + _error( + "The output path already exists. Please either choose a non-existing path, delete the path" + " or use --discard-existing-output (to have this command remove it as necessary)." + ) + _info( + f'Removing existing path "{output_dir}" as requested by --discard-existing-output' + ) + _run(["rm", "-fr", output_dir]) + + materialize_deb( + parsed_args.control_root_dir, + parsed_args.package_manifest, + mtime, + dpkg_deb_args, + parsed_args.udeb, + output_dir, + parsed_args.may_move_control_files, + parsed_args.may_move_data_files, + ) + + if parsed_args.build_method is not None: + assemble_deb( + output_dir, + parsed_args.build_method, + parsed_args.assembled_deb_output, + True, + ) + + elif parsed_args.command == "build-materialized-deb": + assemble_deb( + parsed_args.materialized_deb_root_dir, + parsed_args.build_method, + parsed_args.output, + False, + ) + else: + _error(f'Internal error: Unimplemented command "{parsed_args.command}"') + + +if __name__ == "__main__": + main() diff --git a/src/debputy/commands/deb_packer.py b/src/debputy/commands/deb_packer.py new file mode 100644 index 0000000..8c61099 --- /dev/null +++ b/src/debputy/commands/deb_packer.py @@ -0,0 +1,557 @@ +#!/usr/bin/python3 -B +import argparse +import errno +import operator +import os +import stat +import subprocess +import tarfile +import textwrap +from typing import Optional, List, FrozenSet, Iterable, Callable, BinaryIO, cast + +from debputy.intermediate_manifest import TarMember, PathType +from debputy.util import ( + _error, + compute_output_filename, + resolve_source_date_epoch, + ColorizedArgumentParser, + setup_logging, + program_name, + assume_not_none, +) +from debputy.version import __version__ + + +# AR header / start of a deb file for reference +# 00000000 21 3c 61 72 63 68 3e 0a 64 65 62 69 61 6e 2d 62 |!<arch>.debian-b| +# 00000010 69 6e 61 72 79 20 20 20 31 36 36 38 39 37 33 36 |inary 16689736| +# 00000020 39 35 20 20 30 20 20 20 20 20 30 20 20 20 20 20 |95 0 0 | +# 00000030 31 30 30 36 34 34 20 20 34 20 20 20 20 20 20 20 |100644 4 | +# 00000040 20 20 60 0a 32 2e 30 0a 63 6f 6e 74 72 6f 6c 2e | `.2.0.control.| +# 00000050 74 61 72 2e 78 7a 20 20 31 36 36 38 39 37 33 36 |tar.xz 16689736| +# 00000060 39 35 20 20 30 20 20 20 20 20 30 20 20 20 20 20 |95 0 0 | +# 00000070 31 30 30 36 34 34 20 20 39 33 36 38 20 20 20 20 |100644 9368 | +# 00000080 20 20 60 0a fd 37 7a 58 5a 00 00 04 e6 d6 b4 46 | `..7zXZ......F| + + +class ArMember: + def __init__( + self, + name: str, + mtime: int, + fixed_binary: Optional[bytes] = None, + write_to_impl: Optional[Callable[[BinaryIO], None]] = None, + ) -> None: + self.name = name + self._mtime = mtime + self._write_to_impl = write_to_impl + self.fixed_binary = fixed_binary + + @property + def is_fixed_binary(self) -> bool: + return self.fixed_binary is not None + + @property + def mtime(self) -> int: + return self.mtime + + def write_to(self, fd: BinaryIO) -> None: + writer = self._write_to_impl + assert writer is not None + writer(fd) + + +AR_HEADER_LEN = 60 +AR_HEADER = b" " * AR_HEADER_LEN + + +def write_header( + fd: BinaryIO, + member: ArMember, + member_len: int, + mtime: int, +) -> None: + header = b"%-16s%-12d0 0 100644 %-10d\x60\n" % ( + member.name.encode("ascii"), + mtime, + member_len, + ) + fd.write(header) + + +def generate_ar_archive( + output_filename: str, + mtime: int, + members: Iterable[ArMember], + prefer_raw_exceptions: bool, +) -> None: + try: + with open(output_filename, "wb", buffering=0) as fd: + fd.write(b"!<arch>\n") + for member in members: + if member.is_fixed_binary: + fixed_binary = assume_not_none(member.fixed_binary) + write_header(fd, member, len(fixed_binary), mtime) + fd.write(fixed_binary) + else: + header_pos = fd.tell() + fd.write(AR_HEADER) + member.write_to(fd) + current_pos = fd.tell() + fd.seek(header_pos, os.SEEK_SET) + content_len = current_pos - header_pos - AR_HEADER_LEN + assert content_len >= 0 + write_header(fd, member, content_len, mtime) + fd.seek(current_pos, os.SEEK_SET) + except OSError as e: + if prefer_raw_exceptions: + raise + if e.errno == errno.ENOSPC: + _error( + f"Unable to write {output_filename}. The file system device reported disk full: {str(e)}" + ) + elif e.errno == errno.EIO: + _error( + f"Unable to write {output_filename}. The file system reported a generic I/O error: {str(e)}" + ) + elif e.errno == errno.EROFS: + _error( + f"Unable to write {output_filename}. The file system is read-only: {str(e)}" + ) + raise + print(f"Generated {output_filename}") + + +def _generate_tar_file( + tar_members: Iterable[TarMember], + compression_cmd: List[str], + write_to: BinaryIO, +) -> None: + with ( + subprocess.Popen( + compression_cmd, stdin=subprocess.PIPE, stdout=write_to + ) as compress_proc, + tarfile.open( + mode="w|", + fileobj=compress_proc.stdin, + format=tarfile.GNU_FORMAT, + errorlevel=1, + ) as tar_fd, + ): + for tar_member in tar_members: + tar_info: tarfile.TarInfo = tar_member.create_tar_info(tar_fd) + if tar_member.path_type == PathType.FILE: + with open(assume_not_none(tar_member.fs_path), "rb") as mfd: + tar_fd.addfile(tar_info, fileobj=mfd) + else: + tar_fd.addfile(tar_info) + compress_proc.wait() + if compress_proc.returncode != 0: + _error( + f"Compression command {compression_cmd} failed with code {compress_proc.returncode}" + ) + + +def generate_tar_file_member( + tar_members: Iterable[TarMember], + compression_cmd: List[str], +) -> Callable[[BinaryIO], None]: + def _impl(fd: BinaryIO) -> None: + _generate_tar_file( + tar_members, + compression_cmd, + fd, + ) + + return _impl + + +def _xz_cmdline( + compression_rule: "Compression", + parsed_args: Optional[argparse.Namespace], +) -> List[str]: + compression_level = compression_rule.effective_compression_level(parsed_args) + cmdline = ["xz", "-T2", "-" + str(compression_level)] + strategy = None if parsed_args is None else parsed_args.compression_strategy + if strategy is None: + strategy = "none" + if strategy != "none": + cmdline.append("--" + strategy) + cmdline.append("--no-adjust") + return cmdline + + +def _gzip_cmdline( + compression_rule: "Compression", + parsed_args: Optional[argparse.Namespace], +) -> List[str]: + compression_level = compression_rule.effective_compression_level(parsed_args) + cmdline = ["gzip", "-n" + str(compression_level)] + strategy = None if parsed_args is None else parsed_args.compression_strategy + if strategy is not None and strategy != "none": + raise ValueError( + f"Not implemented: Compression strategy {strategy}" + " for gzip is currently unsupported (but dpkg-deb does)" + ) + return cmdline + + +def _uncompressed_cmdline( + _unused_a: "Compression", + _unused_b: Optional[argparse.Namespace], +) -> List[str]: + return ["cat"] + + +class Compression: + def __init__( + self, + default_compression_level: int, + extension: str, + allowed_strategies: FrozenSet[str], + cmdline_builder: Callable[ + ["Compression", Optional[argparse.Namespace]], List[str] + ], + ) -> None: + self.default_compression_level = default_compression_level + self.extension = extension + self.allowed_strategies = allowed_strategies + self.cmdline_builder = cmdline_builder + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} {self.extension}>" + + def effective_compression_level( + self, parsed_args: Optional[argparse.Namespace] + ) -> int: + if parsed_args and parsed_args.compression_level is not None: + return cast("int", parsed_args.compression_level) + return self.default_compression_level + + def as_cmdline(self, parsed_args: Optional[argparse.Namespace]) -> List[str]: + return self.cmdline_builder(self, parsed_args) + + def with_extension(self, filename: str) -> str: + return filename + self.extension + + +COMPRESSIONS = { + "xz": Compression(6, ".xz", frozenset({"none", "extreme"}), _xz_cmdline), + "gzip": Compression( + 9, + ".gz", + frozenset({"none", "filtered", "huffman", "rle", "fixed"}), + _gzip_cmdline, + ), + "none": Compression(0, "", frozenset({"none"}), _uncompressed_cmdline), +} + + +def _normalize_compression_args(parsed_args: argparse.Namespace) -> argparse.Namespace: + if ( + parsed_args.compression_level == 0 + and parsed_args.compression_algorithm == "gzip" + ): + print( + "Note: Mapping compression algorithm to none for compatibility with dpkg-deb (due to -Zgzip -z0)" + ) + setattr(parsed_args, "compression_algorithm", "none") + + compression = COMPRESSIONS[parsed_args.compression_algorithm] + strategy = parsed_args.compression_strategy + if strategy is not None and strategy not in compression.allowed_strategies: + _error( + f'Compression algorithm "{parsed_args.compression_algorithm}" does not support compression strategy' + f' "{strategy}". Allowed values: {", ".join(sorted(compression.allowed_strategies))}' + ) + return parsed_args + + +def parse_args() -> argparse.Namespace: + try: + compression_level_default = int(os.environ["DPKG_DEB_COMPRESSOR_LEVEL"]) + except (KeyError, ValueError): + compression_level_default = None + + try: + compression_type = os.environ["DPKG_DEB_COMPRESSOR_TYPE"] + except (KeyError, ValueError): + compression_type = "xz" + + try: + threads_max = int(os.environ["DPKG_DEB_THREADS_MAX"]) + except (KeyError, ValueError): + threads_max = None + + description = textwrap.dedent( + """\ + THIS IS A PROTOTYPE "dpkg-deb -b" emulator with basic manifest support + + DO NOT USE THIS TOOL DIRECTLY. It has not stability guarantees and will be removed as + soon as "dpkg-deb -b" grows support for the relevant features. + + This tool is a prototype "dpkg-deb -b"-like interface for compiling a Debian package + without requiring root even for static ownership. It is a temporary stand-in for + "dpkg-deb -b" until "dpkg-deb -b" will get support for a manifest. + + The tool operates on an internal JSON based manifest for now, because it was faster + than building an mtree parser (which is the format that dpkg will likely end up + using). + + As the tool is not meant to be used directly, it is full of annoying paper cuts that + I refuse to fix or maintain. Use the high level tool instead. + + """ + ) + + parser = ColorizedArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter, + allow_abbrev=False, + prog=program_name(), + ) + parser.add_argument("--version", action="version", version=__version__) + parser.add_argument( + "package_root_dir", + metavar="PACKAGE_ROOT_DIR", + help="Root directory of the package. Must contain a DEBIAN directory", + ) + parser.add_argument( + "package_output_path", + metavar="PATH", + help="Path where the package should be placed. If it is directory," + " the base name will be determined from the package metadata", + ) + + parser.add_argument( + "--intermediate-package-manifest", + dest="package_manifest", + metavar="JSON_FILE", + action="store", + default=None, + help="INTERMEDIATE package manifest (JSON!)", + ) + parser.add_argument( + "--root-owner-group", + dest="root_owner_group", + action="store_true", + help="Ignored. Accepted for compatibility with dpkg-deb -b", + ) + parser.add_argument( + "-b", + "--build", + dest="build_param", + action="store_true", + help="Ignored. Accepted for compatibility with dpkg-deb", + ) + parser.add_argument( + "--source-date-epoch", + dest="source_date_epoch", + action="store", + type=int, + default=None, + help="Source date epoch (can also be given via the SOURCE_DATE_EPOCH environ variable", + ) + parser.add_argument( + "-Z", + dest="compression_algorithm", + choices=COMPRESSIONS, + default=compression_type, + help="The compression algorithm to be used", + ) + parser.add_argument( + "-z", + dest="compression_level", + metavar="{0-9}", + choices=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + default=compression_level_default, + type=int, + help="The compression level to be used", + ) + parser.add_argument( + "-S", + dest="compression_strategy", + # We have a different default for xz when strategy is unset and we are building a udeb + action="store", + default=None, + help="The compression algorithm to be used. Concrete values depend on the compression" + ' algorithm, but the value "none" is always allowed', + ) + parser.add_argument( + "--uniform-compression", + dest="uniform_compression", + action="store_true", + default=True, + help="Whether to use the same compression for the control.tar and the data.tar." + " The default is to use uniform compression.", + ) + parser.add_argument( + "--no-uniform-compression", + dest="uniform_compression", + action="store_false", + default=True, + help="Disable uniform compression (see --uniform-compression)", + ) + parser.add_argument( + "--threads-max", + dest="threads_max", + default=threads_max, + # TODO: Support this properly + type=int, + help="Ignored; accepted for compatibility", + ) + parser.add_argument( + "-d", + "--debug", + dest="debug_mode", + action="store_true", + default=False, + help="Enable debug logging and raw stack traces on errors", + ) + + parsed_args = parser.parse_args() + parsed_args = _normalize_compression_args(parsed_args) + + return parsed_args + + +def _ctrl_member( + member_path: str, + fs_path: Optional[str] = None, + path_type: PathType = PathType.FILE, + mode: int = 0o644, + mtime: int = 0, +) -> TarMember: + if fs_path is None: + assert member_path.startswith("./") + fs_path = "DEBIAN" + member_path[1:] + return TarMember( + member_path=member_path, + path_type=path_type, + fs_path=fs_path, + mode=mode, + owner="root", + uid=0, + group="root", + gid=0, + mtime=mtime, + ) + + +CTRL_MEMBER_SCRIPTS = { + "postinst", + "preinst", + "postrm", + "prerm", + "config", + "isinstallable", +} + + +def _ctrl_tar_members(package_root_dir: str, mtime: int) -> Iterable[TarMember]: + debian_root = os.path.join(package_root_dir, "DEBIAN") + dir_st = os.stat(debian_root) + dir_mtime = int(dir_st.st_mtime) + yield _ctrl_member( + "./", + debian_root, + path_type=PathType.DIRECTORY, + mode=0o0755, + mtime=min(mtime, dir_mtime), + ) + with os.scandir(debian_root) as dir_iter: + for ctrl_member in sorted(dir_iter, key=operator.attrgetter("name")): + st = os.stat(ctrl_member) + if not stat.S_ISREG(st.st_mode): + _error( + f"{ctrl_member.path} is not a file and all control.tar members ought to be files!" + ) + file_mtime = int(st.st_mtime) + yield _ctrl_member( + f"./{ctrl_member.name}", + path_type=PathType.FILE, + fs_path=ctrl_member.path, + mode=0o0755 if ctrl_member.name in CTRL_MEMBER_SCRIPTS else 0o0644, + mtime=min(mtime, file_mtime), + ) + + +def parse_manifest(manifest_path: "Optional[str]") -> "List[TarMember]": + if manifest_path is None: + _error(f"--intermediate-package-manifest is mandatory for now") + return TarMember.parse_intermediate_manifest(manifest_path) + + +def main() -> None: + setup_logging() + parsed_args = parse_args() + root_dir: str = parsed_args.package_root_dir + output_path: str = parsed_args.package_output_path + mtime = resolve_source_date_epoch(parsed_args.source_date_epoch) + + data_compression: Compression = COMPRESSIONS[parsed_args.compression_algorithm] + data_compression_cmd = data_compression.as_cmdline(parsed_args) + if parsed_args.uniform_compression: + ctrl_compression = data_compression + ctrl_compression_cmd = data_compression_cmd + else: + ctrl_compression = COMPRESSIONS["gzip"] + ctrl_compression_cmd = COMPRESSIONS["gzip"].as_cmdline(None) + + if output_path.endswith("/") or os.path.isdir(output_path): + deb_file = os.path.join( + output_path, + compute_output_filename(os.path.join(root_dir, "DEBIAN"), False), + ) + else: + deb_file = output_path + + pack( + deb_file, + ctrl_compression, + data_compression, + root_dir, + parsed_args.package_manifest, + mtime, + ctrl_compression_cmd, + data_compression_cmd, + prefer_raw_exceptions=not parsed_args.debug_mode, + ) + + +def pack( + deb_file: str, + ctrl_compression: Compression, + data_compression: Compression, + root_dir: str, + package_manifest: "Optional[str]", + mtime: int, + ctrl_compression_cmd: List[str], + data_compression_cmd: List[str], + prefer_raw_exceptions: bool = False, +) -> None: + data_tar_members = parse_manifest(package_manifest) + members = [ + ArMember("debian-binary", mtime, fixed_binary=b"2.0\n"), + ArMember( + ctrl_compression.with_extension("control.tar"), + mtime, + write_to_impl=generate_tar_file_member( + _ctrl_tar_members(root_dir, mtime), + ctrl_compression_cmd, + ), + ), + ArMember( + data_compression.with_extension("data.tar"), + mtime, + write_to_impl=generate_tar_file_member( + data_tar_members, + data_compression_cmd, + ), + ), + ] + generate_ar_archive(deb_file, mtime, members, prefer_raw_exceptions) + + +if __name__ == "__main__": + main() diff --git a/src/debputy/commands/debputy_cmd/__init__.py b/src/debputy/commands/debputy_cmd/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/commands/debputy_cmd/__init__.py diff --git a/src/debputy/commands/debputy_cmd/__main__.py b/src/debputy/commands/debputy_cmd/__main__.py new file mode 100644 index 0000000..d894731 --- /dev/null +++ b/src/debputy/commands/debputy_cmd/__main__.py @@ -0,0 +1,1576 @@ +#!/usr/bin/python3 -B +import argparse +import json +import os +import shutil +import stat +import subprocess +import sys +import textwrap +import traceback +from tempfile import TemporaryDirectory +from typing import ( + List, + Dict, + Iterable, + Any, + Tuple, + Sequence, + Optional, + NoReturn, + Mapping, + Union, + NamedTuple, + Literal, + Set, + Iterator, + TypedDict, + NotRequired, + cast, +) + +from debputy import DEBPUTY_ROOT_DIR, DEBPUTY_PLUGIN_ROOT_DIR +from debputy.commands.debputy_cmd.context import ( + CommandContext, + add_arg, + ROOT_COMMAND, + CommandArg, +) +from debputy.commands.debputy_cmd.dc_util import flatten_ppfs +from debputy.commands.debputy_cmd.output import _stream_to_pager +from debputy.dh_migration.migrators import MIGRATORS +from debputy.exceptions import ( + DebputyRuntimeError, + PluginNotFoundError, + PluginAPIViolationError, + PluginInitializationError, + UnhandledOrUnexpectedErrorFromPluginError, + SymlinkLoopError, +) +from debputy.package_build.assemble_deb import ( + assemble_debs, +) +from debputy.packager_provided_files import ( + detect_all_packager_provided_files, + PackagerProvidedFile, +) +from debputy.plugin.api.spec import ( + VirtualPath, + packager_provided_file_reference_documentation, +) + +try: + from argcomplete import autocomplete +except ImportError: + + def autocomplete(_parser: argparse.ArgumentParser) -> None: + pass + + +from debputy.version import __version__ +from debputy.filesystem_scan import ( + FSROOverlay, +) +from debputy.plugin.api.impl_types import ( + PackagerProvidedFileClassSpec, + DebputyPluginMetadata, + PluginProvidedKnownPackagingFile, + KNOWN_PACKAGING_FILE_CATEGORY_DESCRIPTIONS, + KNOWN_PACKAGING_FILE_CONFIG_FEATURE_DESCRIPTION, + expand_known_packaging_config_features, + InstallPatternDHCompatRule, + KnownPackagingFileInfo, +) +from debputy.plugin.api.impl import ( + find_json_plugin, + find_tests_for_plugin, + find_related_implementation_files_for_plugin, + parse_json_plugin_desc, + plugin_metadata_for_debputys_own_plugin, +) +from debputy.dh_migration.migration import migrate_from_dh +from debputy.dh_migration.models import AcceptableMigrationIssues +from debputy.packages import BinaryPackage +from debputy.debhelper_emulation import ( + dhe_pkgdir, + parse_drules_for_addons, + extract_dh_addons_from_control, +) + +from debputy.deb_packaging_support import ( + usr_local_transformation, + handle_perl_code, + detect_systemd_user_service_files, + fixup_debian_changelog_and_news_file, + install_upstream_changelog, + relocate_dwarves_into_dbgsym_packages, + run_package_processors, + cross_package_control_files, +) +from debputy.util import ( + _error, + _warn, + ColorizedArgumentParser, + setup_logging, + _info, + escape_shell, + program_name, + integrated_with_debhelper, + assume_not_none, +) + +REFERENCE_DATA_TABLE = { + "config-features": KNOWN_PACKAGING_FILE_CONFIG_FEATURE_DESCRIPTION, + "file-categories": KNOWN_PACKAGING_FILE_CATEGORY_DESCRIPTIONS, +} + + +class SharedArgument(NamedTuple): + """ + Information about an argument shared between a parser and its subparsers + """ + + action: argparse.Action + args: Tuple[Any, ...] + kwargs: Dict[str, Any] + + +class Namespace(argparse.Namespace): + """ + Hacks around a namespace to allow merging of values set multiple times + + Based on: https://www.enricozini.org/blog/2022/python/sharing-argparse-arguments-with-subcommands/ + """ + + def __setattr__(self, name: str, value: Any) -> None: + arg = self._shared_args.get(name) + if arg is not None: + action_type = arg.kwargs.get("action") + if action_type == "store_true": + # OR values + old = getattr(self, name, False) + super().__setattr__(name, old or value) + elif action_type == "store_false": + # AND values + old = getattr(self, name, True) + super().__setattr__(name, old and value) + elif action_type == "append": + old = getattr(self, name, None) + if old is None: + old = [] + super().__setattr__(name, old) + if isinstance(value, list): + old.extend(value) + elif value is not None: + old.append(value) + elif action_type == "store": + old = getattr(self, name, None) + if old is None: + super().__setattr__(name, value) + elif old != value and value is not None: + raise argparse.ArgumentError( + None, + f"conflicting values provided for {arg.action.dest!r} ({old!r} and {value!r})", + ) + else: + raise NotImplementedError( + f"Action {action_type!r} for {arg.action.dest!r} is not supported" + ) + else: + return super().__setattr__(name, value) + + +class DebputyArgumentParser(ColorizedArgumentParser): + """ + Hacks around a standard ArgumentParser to allow to have a limited set of + options both outside and inside subcommands + + Based on: https://www.enricozini.org/blog/2022/python/sharing-argparse-arguments-with-subcommands/ + """ + + def __init__(self, *args: Any, **kw: Any) -> None: + super().__init__(*args, **kw) + + if not hasattr(self, "shared_args"): + self.shared_args: dict[str, SharedArgument] = {} + + # Add arguments from the shared ones + for a in self.shared_args.values(): + super().add_argument(*a.args, **a.kwargs) + + def add_argument(self, *args: Any, **kw: Any) -> Any: + shared = kw.pop("shared", False) + res = super().add_argument(*args, **kw) + if shared: + action = kw.get("action") + if action not in ("store", "store_true", "store_false", "append"): + raise NotImplementedError( + f"Action {action!r} for {args!r} is not supported" + ) + # Take note of the argument if it was marked as shared + self.shared_args[res.dest] = SharedArgument(res, args, kw) + return res + + def add_subparsers(self, *args: Any, **kw: Any) -> Any: + if "parser_class" not in kw: + kw["parser_class"] = type( + "ArgumentParser", + (self.__class__,), + {"shared_args": dict(self.shared_args)}, + ) + return super().add_subparsers(*args, **kw) + + def parse_args(self, *args: Any, **kw: Any) -> Any: + if "namespace" not in kw: + # Use a subclass to pass the special action list without making it + # appear as an argument + kw["namespace"] = type( + "Namespace", (Namespace,), {"_shared_args": self.shared_args} + )() + return super().parse_args(*args, **kw) + + +def _add_common_args(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "--debputy-manifest", + dest="debputy_manifest", + action="store", + default=None, + help="Specify another `debputy` manifest (default: debian/debputy.manifest)", + shared=True, + ) + + parser.add_argument( + "-d", + "--debug", + dest="debug_mode", + action="store_true", + default=False, + help="Enable debug logging and raw stack traces on errors. Some warnings become errors as a consequence.", + shared=True, + ) + + parser.add_argument( + "--no-pager", + dest="pager", + action="store_false", + default=True, + help="For subcommands that can use a pager, disable the use of pager. Some output formats implies --no-pager", + shared=True, + ) + + parser.add_argument( + "--plugin", + dest="required_plugins", + action="append", + type=str, + default=[], + help="Request the plugin to be loaded. Can be used multiple time." + " Ignored for some commands (such as autopkgtest-test-runner)", + shared=True, + ) + + +def _add_packages_args(parser: argparse.ArgumentParser) -> None: + parser.add_argument( + "-p", + "--package", + dest="packages", + action="append", + type=str, + default=[], + help="The package(s) to act on. Affects default permission normalization rules", + ) + + +internal_commands = ROOT_COMMAND.add_dispatching_subcommand( + "internal-command", + dest="internal_command", + metavar="command", + help_description="Commands used for internal purposes. These are implementation details and subject to change", +) +tool_support_commands = ROOT_COMMAND.add_dispatching_subcommand( + "tool-support", + help_description="Tool integration commands. These are intended to have stable output and behaviour", + dest="tool_subcommand", + metavar="command", +) + + +def parse_args() -> argparse.Namespace: + description = textwrap.dedent( + """\ + The `debputy` program is a manifest-based Debian packaging tool. + + It is used as a part of compiling a source package and transforming it into one or + more binary (.deb) packages. + + If you are using a screen reader, consider exporting setting the environment variable + OPTIMIZE_FOR_SCREEN_READER=1. This will remove some of the visual formatting and some + commands will render the output in a purely textual manner rather than visual layout. + """ + ) + + parser: argparse.ArgumentParser = DebputyArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter, + allow_abbrev=False, + prog=program_name(), + ) + + parser.add_argument("--version", action="version", version=__version__) + + _add_common_args(parser) + from debputy.commands.debputy_cmd.plugin_cmds import ( + ensure_plugin_commands_are_loaded, + ) + from debputy.commands.debputy_cmd.lint_and_lsp_cmds import ( + ensure_lint_and_lsp_commands_are_loaded, + ) + + ensure_plugin_commands_are_loaded() + ensure_lint_and_lsp_commands_are_loaded() + + ROOT_COMMAND.configure(parser) + + autocomplete(parser) + + argv = sys.argv + try: + i = argv.index("--") + upstream_args = argv[i + 1 :] + argv = argv[:i] + except (IndexError, ValueError): + upstream_args = [] + parsed_args: argparse.Namespace = parser.parse_args(argv[1:]) + + setattr(parsed_args, "upstream_args", upstream_args) + if hasattr(parsed_args, "packages"): + setattr(parsed_args, "packages", frozenset(parsed_args.packages)) + + return parsed_args + + +@ROOT_COMMAND.register_subcommand( + "check-manifest", + help_description="Check the manifest for obvious errors, but do not run anything", + requested_plugins_only=True, +) +def _check_manifest(context: CommandContext) -> None: + context.parse_manifest() + _info("No errors detected.") + + +def _install_plugin_from_plugin_metadata( + plugin_metadata: DebputyPluginMetadata, + dest_dir: str, +) -> None: + related_files = find_related_implementation_files_for_plugin(plugin_metadata) + install_dir = os.path.join( + f"{dest_dir}/{DEBPUTY_PLUGIN_ROOT_DIR}".replace("//", "/"), + "debputy", + "plugins", + ) + + os.umask(0o022) + os.makedirs(install_dir, exist_ok=True) + cmd = ["cp", "--reflink=auto", "-t", install_dir] + cmd.extend(related_files) + cmd.append(plugin_metadata.plugin_path) + _info(f" {escape_shell(*cmd)}") + subprocess.check_call( + cmd, + stdin=subprocess.DEVNULL, + ) + + +@internal_commands.register_subcommand( + "install-plugin", + help_description="[Internal command] Install a plugin and related files", + requested_plugins_only=True, + argparser=[ + add_arg("target_plugin", metavar="PLUGIN", action="store"), + add_arg( + "--dest-dir", + dest="dest_dir", + default="", + action="store", + ), + ], +) +def _install_plugin(context: CommandContext) -> None: + target_plugin = context.parsed_args.target_plugin + if not os.path.isfile(target_plugin): + _error( + f'The value "{target_plugin}" must be a file. It should be the JSON descriptor of' + f" the plugin." + ) + plugin_metadata = parse_json_plugin_desc(target_plugin) + _install_plugin_from_plugin_metadata( + plugin_metadata, + context.parsed_args.dest_dir, + ) + + +_DH_PLUGIN_PKG_DIR = "debputy-plugins" + + +def _find_plugins_and_tests_in_source_package( + context: CommandContext, +) -> Tuple[bool, List[Tuple[DebputyPluginMetadata, str]], List[str]]: + debian_dir = context.debian_dir + binary_packages = context.binary_packages() + installs = [] + all_tests = [] + had_plugin_dir = False + for binary_package in binary_packages.values(): + if not binary_package.should_be_acted_on: + continue + debputy_plugins_dir = dhe_pkgdir(debian_dir, binary_package, _DH_PLUGIN_PKG_DIR) + if debputy_plugins_dir is None: + continue + if not debputy_plugins_dir.is_dir: + continue + had_plugin_dir = True + dest_dir = os.path.join("debian", binary_package.name) + for path in debputy_plugins_dir.iterdir: + if not path.is_file or not path.name.endswith((".json", ".json.in")): + continue + plugin_metadata = parse_json_plugin_desc(path.path) + if ( + plugin_metadata.plugin_name.startswith("debputy-") + or plugin_metadata.plugin_name == "debputy" + ): + _error( + f"The plugin name {plugin_metadata.plugin_name} is reserved by debputy. Please rename" + " the plugin to something else." + ) + installs.append((plugin_metadata, dest_dir)) + all_tests.extend(find_tests_for_plugin(plugin_metadata)) + return had_plugin_dir, installs, all_tests + + +@ROOT_COMMAND.register_subcommand( + "autopkgtest-test-runner", + requested_plugins_only=True, + help_description="Detect tests in the debian dir and run them against installed plugins", +) +def _autodep8_test_runner(context: CommandContext) -> None: + ad_hoc_run = "AUTOPKGTEST_TMP" not in os.environ + _a, _b, all_tests = _find_plugins_and_tests_in_source_package(context) + + source_package = context.source_package() + explicit_test = ( + "autopkgtest-pkg-debputy" in source_package.fields.get("Testsuite", "").split() + ) + + if not shutil.which("py.test"): + if ad_hoc_run: + extra_context = "" + if not explicit_test: + extra_context = ( + " Remember to add python3-pytest to the Depends field of your autopkgtests field if" + " you are writing your own test case for autopkgtest. Note you can also add" + ' "autopkgtest-pkg-debputy" to the "Testsuite" field in debian/control if you' + " want the test case autogenerated." + ) + _error( + f"Please install the py.test command (apt-get install python3-pytest).{extra_context}" + ) + _error("Please add python3-pytest to the Depends field of your autopkgtests.") + + if not all_tests: + extra_context = "" + if explicit_test: + extra_context = ( + " If the package no longer provides any plugin or tests, please remove the " + ' "autopkgtest-pkg-debputy" test from the "Testsuite" in debian/control' + ) + _error( + "There are no tests to be run. The autodep8 feature should not have generated a test for" + f" this case.{extra_context}" + ) + + if _run_tests( + context, + all_tests, + test_plugin_location="installed", + on_error_return=False, + ): + return + extra_context = "" + if not ad_hoc_run: + extra_context = ( + ' These tests can be run manually via the "debputy autopkgtest-test-runner" command without any' + ' autopkgtest layering. To do so, install "dh-debputy python3-pytest" plus the packages' + " being tested and relevant extra dependencies required for the tests. Then open a shell in" + f' the unpacked source directory of {source_package.name} and run "debputy autopkgtest-test-runner"' + ) + _error(f"The tests were not successful.{extra_context}") + + +@internal_commands.register_subcommand( + "dh-integration-install-plugin", + help_description="[Internal command] Install a plugin and related files via debhelper integration", + requested_plugins_only=True, + argparser=_add_packages_args, +) +def _dh_integration_install_plugin(context: CommandContext) -> None: + had_plugin_dir, installs, all_tests = _find_plugins_and_tests_in_source_package( + context + ) + + if not installs: + if had_plugin_dir: + _warn( + "There were plugin dirs, but no plugins were detected inside them. Please ensure that " + f" the plugin dirs (debian/<pkg>.{_DH_PLUGIN_PKG_DIR} or debian/{_DH_PLUGIN_PKG_DIR})" + f" contains a .json or .json.in file, or remove them (plus drop the" + f" dh-sequence-installdebputy build dependency) if they are no longer useful." + ) + else: + _info( + f"No plugin directories detected (debian/<pkg>.{_DH_PLUGIN_PKG_DIR} or debian/{_DH_PLUGIN_PKG_DIR})" + ) + return + + if all_tests: + if "nocheck" in context.deb_build_options_and_profiles.deb_build_options: + _info("Skipping tests due to DEB_BUILD_OPTIONS=nocheck") + elif not shutil.which("py.test"): + _warn("Skipping tests because py.test is not available") + else: + _run_tests(context, all_tests) + else: + _info("No tests detected for any of the plugins. Skipping running tests.") + + for plugin_metadata, dest_dir in installs: + _info(f"Installing plugin {plugin_metadata.plugin_name} into {dest_dir}") + _install_plugin_from_plugin_metadata(plugin_metadata, dest_dir) + + +def _run_tests( + context: CommandContext, + test_paths: List[str], + *, + cwd: Optional[str] = None, + tmpdir_root: Optional[str] = None, + test_plugin_location: Literal["installed", "uninstalled"] = "uninstalled", + on_error_return: Optional[Any] = None, + on_success_return: Optional[Any] = True, +) -> Any: + env = dict(os.environ) + env["DEBPUTY_TEST_PLUGIN_LOCATION"] = test_plugin_location + if "PYTHONPATH" in env: + env["PYTHONPATH"] = f"{DEBPUTY_ROOT_DIR}:{env['PYTHONPATH']}" + else: + env["PYTHONPATH"] = str(DEBPUTY_ROOT_DIR) + + env["PYTHONDONTWRITEBYTECODE"] = "1" + _info("Running debputy plugin tests.") + _info("") + _info("Environment settings:") + for envname in [ + "PYTHONPATH", + "PYTHONDONTWRITEBYTECODE", + "DEBPUTY_TEST_PLUGIN_LOCATION", + ]: + _info(f" {envname}={env[envname]}") + + with TemporaryDirectory(dir=tmpdir_root) as tmpdir: + cmd = [ + "py.test", + "-vvvvv" if context.parsed_args.debug_mode else "-v", + "--config-file=/dev/null", + f"--rootdir={cwd if cwd is not None else '.'}", + "-o", + f"cache_dir={tmpdir}", + ] + cmd.extend(test_paths) + + _info(f"Test Command: {escape_shell(*cmd)}") + try: + subprocess.check_call( + cmd, + stdin=subprocess.DEVNULL, + env=env, + cwd=cwd, + ) + except subprocess.CalledProcessError: + if on_error_return is None: + _error("The tests were not successful.") + return on_error_return + return True + + +@internal_commands.register_subcommand( + "run-tests-for-plugin", + help_description="[Internal command] Run tests for a plugin", + requested_plugins_only=True, + argparser=[ + add_arg("target_plugin", metavar="PLUGIN", action="store"), + add_arg( + "--require-tests", + dest="require_tests", + default=True, + action=argparse.BooleanOptionalAction, + ), + ], +) +def _run_tests_for_plugin(context: CommandContext) -> None: + target_plugin = context.parsed_args.target_plugin + if not os.path.isfile(target_plugin): + _error( + f'The value "{target_plugin}" must be a file. It should be the JSON descriptor of' + f" the plugin." + ) + try: + plugin_metadata = find_json_plugin( + context.plugin_search_dirs, + target_plugin, + ) + except PluginNotFoundError as e: + _error(e.message) + + tests = find_tests_for_plugin(plugin_metadata) + + if not tests: + if context.parsed_args.require_tests: + plugin_name = plugin_metadata.plugin_name + plugin_dir = os.path.dirname(plugin_metadata.plugin_path) + + _error( + f"Cannot find any tests for {plugin_name}: Expected them to be in " + f' "{plugin_dir}". Use --no-require-tests to consider missing tests' + " a non-error." + ) + _info( + f"No tests found for {plugin_metadata.plugin_name}. Use --require-tests to turn" + " this into an error." + ) + return + + if not shutil.which("py.test"): + _error( + f"Cannot run the tests for {plugin_metadata.plugin_name}: This feature requires py.test" + f" (apt-get install python3-pytest)" + ) + _run_tests(context, tests, cwd="/") + + +@internal_commands.register_subcommand( + "dh-integration-generate-debs", + help_description="[Internal command] Generate .deb/.udebs packages from debian/<pkg> (Not stable API)", + requested_plugins_only=True, + argparser=[ + _add_packages_args, + add_arg( + "--integration-mode", + dest="integration_mode", + default=None, + choices=["rrr"], + ), + add_arg( + "output", + metavar="output", + help="Where to place the resulting packages. Should be a directory", + ), + # Added for "help only" - you cannot trigger this option in practice + add_arg( + "--", + metavar="UPSTREAM_ARGS", + action="extend", + nargs="+", + dest="unused", + ), + ], +) +def _dh_integration_generate_debs(context: CommandContext) -> None: + integrated_with_debhelper() + parsed_args = context.parsed_args + is_dh_rrr_only_mode = parsed_args.integration_mode == "rrr" + if is_dh_rrr_only_mode: + problematic_plugins = list(context.requested_plugins()) + problematic_plugins.extend(context.required_plugins()) + if problematic_plugins: + plugin_names = ", ".join(problematic_plugins) + _error( + f"Plugins are not supported in the zz-debputy-rrr sequence. Detected plugins: {plugin_names}" + ) + + plugins = context.load_plugins().plugin_data + for plugin in plugins.values(): + _info(f"Loaded plugin {plugin.plugin_name}") + manifest = context.parse_manifest() + + package_data_table = manifest.perform_installations( + enable_manifest_installation_feature=not is_dh_rrr_only_mode + ) + source_fs = FSROOverlay.create_root_dir("..", ".") + source_version = manifest.source_version() + is_native = "-" not in source_version + + if not is_dh_rrr_only_mode: + for dctrl_bin in manifest.active_packages: + package = dctrl_bin.name + dctrl_data = package_data_table[package] + fs_root = dctrl_data.fs_root + package_metadata_context = dctrl_data.package_metadata_context + + assert dctrl_bin.should_be_acted_on + + detect_systemd_user_service_files(dctrl_bin, fs_root) + usr_local_transformation(dctrl_bin, fs_root) + handle_perl_code( + dctrl_bin, + manifest.dpkg_architecture_variables, + fs_root, + dctrl_data.substvars, + ) + if "nostrip" not in manifest.build_env.deb_build_options: + dbgsym_ids = relocate_dwarves_into_dbgsym_packages( + dctrl_bin, + fs_root, + dctrl_data.dbgsym_info.dbgsym_fs_root, + ) + dctrl_data.dbgsym_info.dbgsym_ids = dbgsym_ids + + fixup_debian_changelog_and_news_file( + dctrl_bin, + fs_root, + is_native, + manifest.build_env, + ) + if not is_native: + install_upstream_changelog( + dctrl_bin, + fs_root, + source_fs, + ) + run_package_processors(manifest, package_metadata_context, fs_root) + + cross_package_control_files(package_data_table, manifest) + for binary_data in package_data_table: + if not binary_data.binary_package.should_be_acted_on: + continue + # Ensure all fs's are read-only before we enable cross package checks. + # This ensures that no metadata detector will never see a read-write FS + cast("FSRootDir", binary_data.fs_root).is_read_write = False + + package_data_table.enable_cross_package_checks = True + assemble_debs( + context, + manifest, + package_data_table, + is_dh_rrr_only_mode, + ) + + +PackagingFileInfo = TypedDict( + "PackagingFileInfo", + { + "path": str, + "binary-package": NotRequired[str], + "install-path": NotRequired[str], + "install-pattern": NotRequired[str], + "file-categories": NotRequired[List[str]], + "config-features": NotRequired[List[str]], + "likely-generated-from": NotRequired[List[str]], + "related-tools": NotRequired[List[str]], + "documentation-uris": NotRequired[List[str]], + "debputy-cmd-templates": NotRequired[List[List[str]]], + "generates": NotRequired[str], + "generated-from": NotRequired[str], + }, +) + + +def _scan_debian_dir(debian_dir: VirtualPath) -> Iterator[VirtualPath]: + for p in debian_dir.iterdir: + yield p + if p.is_dir and p.path in ("debian/source", "debian/tests"): + yield from p.iterdir + + +_POST_FORMATTING_REWRITE = { + "period-to-underscore": lambda n: n.replace(".", "_"), +} + + +def _fake_PPFClassSpec( + debputy_plugin_metadata: DebputyPluginMetadata, + stem: str, + doc_uris: Sequence[str], + install_pattern: Optional[str], + *, + default_priority: Optional[int] = None, + packageless_is_fallback_for_all_packages: bool = False, + post_formatting_rewrite: Optional[str] = None, + bug_950723: bool = False, +) -> PackagerProvidedFileClassSpec: + if install_pattern is None: + install_pattern = "not-a-real-ppf" + if post_formatting_rewrite is not None: + formatting_hook = _POST_FORMATTING_REWRITE[post_formatting_rewrite] + else: + formatting_hook = None + return PackagerProvidedFileClassSpec( + debputy_plugin_metadata, + stem, + install_pattern, + allow_architecture_segment=True, + allow_name_segment=True, + default_priority=default_priority, + default_mode=0o644, + post_formatting_rewrite=formatting_hook, + packageless_is_fallback_for_all_packages=packageless_is_fallback_for_all_packages, + reservation_only=False, + formatting_callback=None, + bug_950723=bug_950723, + reference_documentation=packager_provided_file_reference_documentation( + format_documentation_uris=doc_uris, + ), + ) + + +def _relevant_dh_compat_rules( + compat_level: Optional[int], + info: KnownPackagingFileInfo, +) -> Iterable[InstallPatternDHCompatRule]: + if compat_level is None: + return + dh_compat_rules = info.get("dh_compat_rules") + if not dh_compat_rules: + return + for dh_compat_rule in dh_compat_rules: + rule_compat_level = dh_compat_rule.get("starting_with_compat_level") + if rule_compat_level is not None and compat_level < rule_compat_level: + continue + yield dh_compat_rule + + +def _kpf_install_pattern( + compat_level: Optional[int], + ppkpf: PluginProvidedKnownPackagingFile, +) -> Optional[str]: + for compat_rule in _relevant_dh_compat_rules(compat_level, ppkpf.info): + install_pattern = compat_rule.get("install_pattern") + if install_pattern is not None: + return install_pattern + return ppkpf.info.get("install_pattern") + + +def _resolve_debhelper_config_files( + debian_dir: VirtualPath, + binary_packages: Mapping[str, BinaryPackage], + debputy_plugin_metadata: DebputyPluginMetadata, + dh_ppf_docs: Dict[str, PluginProvidedKnownPackagingFile], + dh_rules_addons: Iterable[str], + dh_compat_level: int, +) -> Tuple[List[PackagerProvidedFile], Optional[object], int]: + dh_ppfs = {} + commands, exit_code = _relevant_dh_commands(dh_rules_addons) + dh_commands = set(commands) + + cmd = ["dh_assistant", "list-guessed-dh-config-files"] + if dh_rules_addons: + addons = ",".join(dh_rules_addons) + cmd.append(f"--with={addons}") + try: + output = subprocess.check_output( + cmd, + stderr=subprocess.DEVNULL, + ) + except (subprocess.CalledProcessError, FileNotFoundError) as e: + config_files = [] + issues = None + if isinstance(e, subprocess.CalledProcessError): + exit_code = e.returncode + else: + exit_code = 127 + else: + result = json.loads(output) + config_files: List[Union[Mapping[str, Any], object]] = result.get( + "config-files", [] + ) + issues = result.get("issues") + for config_file in config_files: + if not isinstance(config_file, dict): + continue + if config_file.get("file-type") != "pkgfile": + continue + stem = config_file.get("pkgfile") + if stem is None: + continue + internal = config_file.get("internal") + if isinstance(internal, dict): + bug_950723 = internal.get("bug#950723", False) is True + else: + bug_950723 = False + commands = config_file.get("commands") + documentation_uris = [] + related_tools = [] + seen_commands = set() + seen_docs = set() + ppkpf = dh_ppf_docs.get(stem) + if ppkpf: + dh_cmds = ppkpf.info.get("debhelper_commands") + doc_uris = ppkpf.info.get("documentation_uris") + default_priority = ppkpf.info.get("default_priority") + if doc_uris is not None: + seen_docs.update(doc_uris) + documentation_uris.extend(doc_uris) + if dh_cmds is not None: + seen_commands.update(dh_cmds) + related_tools.extend(dh_cmds) + install_pattern = _kpf_install_pattern(dh_compat_level, ppkpf) + post_formatting_rewrite = ppkpf.info.get("post_formatting_rewrite") + packageless_is_fallback_for_all_packages = ppkpf.info.get( + "packageless_is_fallback_for_all_packages", + False, + ) + else: + install_pattern = None + default_priority = None + post_formatting_rewrite = None + packageless_is_fallback_for_all_packages = False + for command in commands: + if isinstance(command, dict): + command_name = command.get("command") + if isinstance(command_name, str) and command_name: + if command_name not in seen_commands: + related_tools.append(command_name) + seen_commands.add(command_name) + manpage = f"man:{command_name}(1)" + if manpage not in seen_docs: + documentation_uris.append(manpage) + seen_docs.add(manpage) + dh_ppfs[stem] = _fake_PPFClassSpec( + debputy_plugin_metadata, + stem, + documentation_uris, + install_pattern, + default_priority=default_priority, + post_formatting_rewrite=post_formatting_rewrite, + packageless_is_fallback_for_all_packages=packageless_is_fallback_for_all_packages, + bug_950723=bug_950723, + ) + for ppkpf in dh_ppf_docs.values(): + stem = ppkpf.detection_value + if stem in dh_ppfs: + continue + + default_priority = ppkpf.info.get("default_priority") + commands = ppkpf.info.get("debhelper_commands") + install_pattern = _kpf_install_pattern(dh_compat_level, ppkpf) + post_formatting_rewrite = ppkpf.info.get("post_formatting_rewrite") + packageless_is_fallback_for_all_packages = ppkpf.info.get( + "packageless_is_fallback_for_all_packages", + False, + ) + if commands and not any(c in dh_commands for c in commands): + continue + dh_ppfs[stem] = _fake_PPFClassSpec( + debputy_plugin_metadata, + stem, + ppkpf.info.get("documentation_uris"), + install_pattern, + default_priority=default_priority, + post_formatting_rewrite=post_formatting_rewrite, + packageless_is_fallback_for_all_packages=packageless_is_fallback_for_all_packages, + ) + dh_ppfs = list( + flatten_ppfs( + detect_all_packager_provided_files( + dh_ppfs, + debian_dir, + binary_packages, + allow_fuzzy_matches=True, + ) + ) + ) + return dh_ppfs, issues, exit_code + + +def _merge_list( + existing_table: Dict[str, Any], + key: str, + new_data: Optional[List[str]], +) -> None: + if not new_data: + return + existing_values = existing_table.get(key, []) + if isinstance(existing_values, tuple): + existing_values = list(existing_values) + assert isinstance(existing_values, list) + seen = set(existing_values) + existing_values.extend(x for x in new_data if x not in seen) + existing_table[key] = existing_values + + +def _merge_ppfs( + identified: List[PackagingFileInfo], + seen_paths: Set[str], + ppfs: List[PackagerProvidedFile], + context: Mapping[str, PluginProvidedKnownPackagingFile], + dh_compat_level: Optional[int], +) -> None: + for ppf in ppfs: + key = ppf.path.path + ref_doc = ppf.definition.reference_documentation + documentation_uris = ( + ref_doc.format_documentation_uris if ref_doc is not None else None + ) + + if not ppf.definition.installed_as_format.startswith("not-a-real-ppf"): + try: + parts = ppf.compute_dest() + except RuntimeError: + dest = None + else: + dest = "/".join(parts).lstrip(".") + else: + dest = None + seen_paths.add(key) + details: PackagingFileInfo = { + "path": key, + "binary-package": ppf.package_name, + } + if ppf.fuzzy_match and key.endswith(".in"): + _merge_list(details, "file-categories", ["generic-template"]) + details["generates"] = key[:-3] + elif assume_not_none(ppf.path.parent_dir).get(ppf.path.name + ".in"): + _merge_list(details, "file-categories", ["generated"]) + details["generated-from"] = key + ".in" + if dest is not None: + details["install-path"] = dest + identified.append(details) + + extra_details = context.get(ppf.definition.stem) + if extra_details is not None: + _add_known_packaging_data(details, extra_details, dh_compat_level) + + _merge_list(details, "documentation-uris", documentation_uris) + + +def _is_debputy_package(context: CommandContext, dh_rules_addons: Set[str]) -> bool: + drules = context.debian_dir.get("rules") + sequences = set() + source_package = context.source_package() + if drules is not None and not drules.is_dir: + parse_drules_for_addons(drules, dh_rules_addons) + extract_dh_addons_from_control(source_package.fields, sequences) + sequences.update(dh_rules_addons) + return ( + "debputy" in sequences or "zz-debputy" in sequences or "zz_debputy" in sequences + ) + + +def _extract_dh_compat_level() -> Tuple[Optional[int], int]: + try: + output = subprocess.check_output( + ["dh_assistant", "active-compat-level"], + stderr=subprocess.DEVNULL, + ) + except (FileNotFoundError, subprocess.CalledProcessError) as e: + exit_code = 127 + if isinstance(e, subprocess.CalledProcessError): + exit_code = e.returncode + return None, exit_code + else: + data = json.loads(output) + active_compat_level = data.get("active-compat-level") + exit_code = 0 + if not isinstance(active_compat_level, int) or active_compat_level < 1: + active_compat_level = None + exit_code = 255 + return active_compat_level, exit_code + + +def _relevant_dh_commands(dh_rules_addons: Iterable[str]) -> Tuple[List[str], int]: + cmd = ["dh_assistant", "list-commands", "--output-format=json"] + if dh_rules_addons: + addons = ",".join(dh_rules_addons) + cmd.append(f"--with={addons}") + try: + output = subprocess.check_output( + cmd, + stderr=subprocess.DEVNULL, + ) + except (FileNotFoundError, subprocess.CalledProcessError) as e: + exit_code = 127 + if isinstance(e, subprocess.CalledProcessError): + exit_code = e.returncode + return [], exit_code + else: + data = json.loads(output) + commands_json = data.get("commands") + commands = [] + for command in commands_json: + if isinstance(command, dict): + command_name = command.get("command") + if isinstance(command_name, str) and command_name: + commands.append(command_name) + return commands, 0 + + +@tool_support_commands.register_subcommand( + "supports-tool-command", + help_description="Test where a given tool-support command exists", + argparser=add_arg( + "test_command", + metavar="name", + default=None, + help="The name of the command", + ), +) +def _supports_tool_command(context: CommandContext) -> None: + command_name = context.parsed_args.test_command + if tool_support_commands.has_command(command_name): + sys.exit(0) + else: + sys.exit(2) + + +@tool_support_commands.register_subcommand( + "export-reference-data", + help_description="Export reference data for other tool-support commands", + argparser=[ + add_arg( + "--output-format", + default="text", + choices=["text", "json"], + help="Output format of the reference data", + ), + add_arg( + "dataset", + metavar="name", + default=None, + nargs="?", + help="The dataset to export (if any)", + choices=REFERENCE_DATA_TABLE, + ), + ], +) +def _export_reference_data(context: CommandContext) -> None: + dataset_name = context.parsed_args.dataset + output_format = context.parsed_args.output_format + if dataset_name is not None: + subdata_set = REFERENCE_DATA_TABLE.get(dataset_name) + if subdata_set is None: + _error(f"Unknown data set: {dataset_name}") + reference_data = { + dataset_name: subdata_set, + } + else: + subdata_set = None + reference_data = REFERENCE_DATA_TABLE + if output_format == "text": + if subdata_set is None: + _error( + "When output format is text, then the dataset name is required (it is optional for JSON formats)." + ) + with _stream_to_pager(context.parsed_args) as (fd, fo): + header = ["key", "description"] + rows = [(k, v["description"]) for k, v in subdata_set.items()] + fo.print_list_table(header, rows) + fo.print() + fo.print("If you wanted this as JSON, please use --output-format=json") + elif output_format == "json": + _json_output( + { + "reference-data": reference_data, + } + ) + else: + raise AssertionError(f"Unsupported output format {output_format}") + + +def _add_known_packaging_data( + details: PackagingFileInfo, + plugin_data: PluginProvidedKnownPackagingFile, + dh_compat_level: Optional[int], +): + install_pattern = _kpf_install_pattern( + dh_compat_level, + plugin_data, + ) + config_features = plugin_data.info.get("config_features") + if config_features: + config_features = expand_known_packaging_config_features( + dh_compat_level or 0, + config_features, + ) + _merge_list(details, "config-features", config_features) + + if dh_compat_level is not None: + extra_config_features = [] + for dh_compat_rule in _relevant_dh_compat_rules( + dh_compat_level, plugin_data.info + ): + cf = dh_compat_rule.get("add_config_features") + if cf: + extra_config_features.extend(cf) + if extra_config_features: + extra_config_features = expand_known_packaging_config_features( + dh_compat_level, + extra_config_features, + ) + _merge_list(details, "config-features", extra_config_features) + if "install-pattern" not in details and install_pattern is not None: + details["install-pattern"] = install_pattern + for mk, ok in [ + ("file_categories", "file-categories"), + ("documentation_uris", "documentation-uris"), + ("debputy_cmd_templates", "debputy-cmd-templates"), + ]: + value = plugin_data.info.get(mk) + if value and ok == "debputy-cmd-templates": + value = [escape_shell(*c) for c in value] + _merge_list(details, ok, value) + + +@tool_support_commands.register_subcommand( + "annotate-debian-directory", + log_only_to_stderr=True, + help_description="Scan debian/* for known package files and annotate them with information." + " Output is evaluated and may change. Please get in touch if you want to use it" + " or want additional features.", +) +def _annotate_debian_directory(context: CommandContext) -> None: + # Validates that we are run from a debian directory as a side effect + binary_packages = context.binary_packages() + feature_set = context.load_plugins() + known_packaging_files = feature_set.known_packaging_files + debputy_plugin_metadata = plugin_metadata_for_debputys_own_plugin() + + reference_data_set_names = [ + "config-features", + "file-categories", + ] + for n in reference_data_set_names: + assert n in REFERENCE_DATA_TABLE + + annotated: List[PackagingFileInfo] = [] + seen_paths = set() + + drules_sequences = set() + is_debputy_package = _is_debputy_package(context, drules_sequences) + dh_compat_level, dh_assistant_exit_code = _extract_dh_compat_level() + dh_issues = [] + + static_packaging_files = { + kpf.detection_value: kpf + for kpf in known_packaging_files.values() + if kpf.detection_method == "path" + } + dh_pkgfile_docs = { + kpf.detection_value: kpf + for kpf in known_packaging_files.values() + if kpf.detection_method == "dh.pkgfile" + } + + if is_debputy_package: + all_debputy_ppfs = list( + flatten_ppfs( + detect_all_packager_provided_files( + feature_set.packager_provided_files, + context.debian_dir, + binary_packages, + allow_fuzzy_matches=True, + ) + ) + ) + else: + all_debputy_ppfs = [] + + if dh_compat_level is not None: + ( + all_dh_ppfs, + dh_issues, + dh_assistant_exit_code, + ) = _resolve_debhelper_config_files( + context.debian_dir, + binary_packages, + debputy_plugin_metadata, + dh_pkgfile_docs, + drules_sequences, + dh_compat_level, + ) + + else: + all_dh_ppfs = [] + + for ppf in all_debputy_ppfs: + key = ppf.path.path + ref_doc = ppf.definition.reference_documentation + documentation_uris = ( + ref_doc.format_documentation_uris if ref_doc is not None else None + ) + details: PackagingFileInfo = { + "path": key, + "debputy-cmd-templates": [ + ["debputy", "plugin", "show", "p-p-f", ppf.definition.stem] + ], + } + if ppf.fuzzy_match and key.endswith(".in"): + _merge_list(details, "file-categories", ["generic-template"]) + details["generates"] = key[:-3] + elif assume_not_none(ppf.path.parent_dir).get(ppf.path.name + ".in"): + _merge_list(details, "file-categories", ["generated"]) + details["generated-from"] = key + ".in" + seen_paths.add(key) + annotated.append(details) + static_details = static_packaging_files.get(key) + if static_details is not None: + # debhelper compat rules does not apply to debputy files + _add_known_packaging_data(details, static_details, None) + if documentation_uris: + details["documentation-uris"] = list(documentation_uris) + + _merge_ppfs(annotated, seen_paths, all_dh_ppfs, dh_pkgfile_docs, dh_compat_level) + + for virtual_path in _scan_debian_dir(context.debian_dir): + key = virtual_path.path + if key in seen_paths: + continue + if virtual_path.is_symlink: + try: + st = os.stat(virtual_path.fs_path) + except FileNotFoundError: + continue + else: + if not stat.S_ISREG(st.st_mode): + continue + elif not virtual_path.is_file: + continue + + static_match = static_packaging_files.get(virtual_path.path) + if static_match is not None: + details: PackagingFileInfo = { + "path": key, + } + annotated.append(details) + if assume_not_none(virtual_path.parent_dir).get(virtual_path.name + ".in"): + details["generated-from"] = key + ".in" + _merge_list(details, "file-categories", ["generated"]) + _add_known_packaging_data(details, static_match, dh_compat_level) + + data = { + "result": annotated, + "reference-datasets": reference_data_set_names, + } + if dh_issues is not None or dh_assistant_exit_code != 0: + data["issues"] = [ + { + "source": "dh_assistant", + "exit-code": dh_assistant_exit_code, + "issue-data": dh_issues, + } + ] + _json_output(data) + + +def _json_output(data: Any) -> None: + format_options = {} + if sys.stdout.isatty(): + format_options = { + "indent": 4, + # sort_keys might be tempting but generally insert order makes more sense in practice. + } + json.dump(data, sys.stdout, **format_options) + if sys.stdout.isatty(): + # Looks better with a final newline. + print() + + +@ROOT_COMMAND.register_subcommand( + "migrate-from-dh", + help_description='Generate/update manifest from a "dh $@" using package', + argparser=[ + add_arg( + "--acceptable-migration-issues", + dest="acceptable_migration_issues", + action="append", + type=str, + default=[], + help="Continue the migration even if this/these issues are detected." + " Can be set to ALL (in all upper-case) to accept all issues", + ), + add_arg( + "--migration-target", + dest="migration_target", + action="store", + choices=MIGRATORS, + type=str, + default=None, + help="Continue the migration even if this/these issues are detected." + " Can be set to ALL (in all upper-case) to accept all issues", + ), + add_arg( + "--no-act", + "--no-apply-changes", + dest="destructive", + action="store_false", + default=None, + help="Do not perform changes. Existing manifest will not be overridden", + ), + add_arg( + "--apply-changes", + dest="destructive", + action="store_true", + default=None, + help="Perform changes. The debian/debputy.manifest will updated in place if exists", + ), + ], +) +def _migrate_from_dh(context: CommandContext) -> None: + parsed_args = context.parsed_args + manifest = context.parse_manifest() + acceptable_migration_issues = AcceptableMigrationIssues( + frozenset( + i for x in parsed_args.acceptable_migration_issues for i in x.split(",") + ) + ) + migrate_from_dh( + manifest, + acceptable_migration_issues, + parsed_args.destructive, + parsed_args.migration_target, + lambda p: context.parse_manifest(manifest_path=p), + ) + + +def _setup_and_parse_args() -> argparse.Namespace: + is_arg_completing = "_ARGCOMPLETE" in os.environ + if not is_arg_completing: + setup_logging() + parsed_args = parse_args() + if is_arg_completing: + # We could be asserting at this point; but lets just recover gracefully. + setup_logging() + return parsed_args + + +def main() -> None: + parsed_args = _setup_and_parse_args() + plugin_search_dirs = [str(DEBPUTY_PLUGIN_ROOT_DIR)] + try: + cmd_arg = CommandArg( + parsed_args, + plugin_search_dirs, + ) + ROOT_COMMAND(cmd_arg) + except PluginInitializationError as e: + _error_w_stack_trace( + "Failed to load a plugin - full stack strace:", + e.message, + e, + parsed_args.debug_mode, + follow_warning=[ + "Please consider filing a bug against the plugin in question" + ], + ) + except UnhandledOrUnexpectedErrorFromPluginError as e: + trace = e.__cause__ if e.__cause__ is not None else e + # TODO: Reframe this as an internal error if `debputy` is the misbehaving plugin + if isinstance(trace, SymlinkLoopError): + _error_w_stack_trace( + "Error in `debputy`:", + e.message, + trace, + parsed_args.debug_mode, + orig_exception=e, + follow_warning=[ + "Please consider filing a bug against `debputy` in question" + ], + ) + else: + _error_w_stack_trace( + "A plugin misbehaved:", + e.message, + trace, + parsed_args.debug_mode, + orig_exception=e, + follow_warning=[ + "Please consider filing a bug against the plugin in question" + ], + ) + except PluginAPIViolationError as e: + trace = e.__cause__ if e.__cause__ is not None else e + # TODO: Reframe this as an internal error if `debputy` is the misbehaving plugin + _error_w_stack_trace( + "A plugin misbehaved:", + e.message, + trace, + parsed_args.debug_mode, + orig_exception=e, + follow_warning=[ + "Please consider filing a bug against the plugin in question" + ], + ) + except DebputyRuntimeError as e: + if parsed_args.debug_mode: + _warn( + "Re-raising original exception to show the full stack trace due to debug mode being active" + ) + raise e + _error(e.message) + except AssertionError as e: + _error_w_stack_trace( + "Internal error in debputy", + str(e), + e, + parsed_args.debug_mode, + orig_exception=e, + follow_warning=["Please file a bug against debputy with the full output."], + ) + except subprocess.CalledProcessError as e: + cmd = escape_shell(*e.cmd) if isinstance(e.cmd, list) else str(e.cmd) + _error_w_stack_trace( + f"The command << {cmd} >> failed and the code did not explicitly handle that exception.", + str(e), + e, + parsed_args.debug_mode, + orig_exception=e, + follow_warning=[ + "The output above this error and the stacktrace may provide context to why the command failed.", + "Please file a bug against debputy with the full output.", + ], + ) + except Exception as e: + _error_w_stack_trace( + "Unhandled exception (Re-run with --debug to see the raw stack trace)", + str(e), + e, + parsed_args.debug_mode, + orig_exception=e, + follow_warning=["Please file a bug against debputy with the full output."], + ) + + +def _error_w_stack_trace( + warning: str, + error_msg: str, + stacktrace: BaseException, + debug_mode: bool, + orig_exception: Optional[BaseException] = None, + follow_warning: Optional[List[str]] = None, +) -> "NoReturn": + if debug_mode: + _warn( + "Re-raising original exception to show the full stack trace due to debug mode being active" + ) + raise orig_exception if orig_exception is not None else stacktrace + _warn(warning) + _warn(" ----- 8< ---- BEGIN STACK TRACE ---- 8< -----") + traceback.print_exception(stacktrace) + _warn(" ----- 8< ---- END STACK TRACE ---- 8< -----") + if follow_warning: + for line in follow_warning: + _warn(line) + _error(error_msg) + + +if __name__ == "__main__": + main() diff --git a/src/debputy/commands/debputy_cmd/context.py b/src/debputy/commands/debputy_cmd/context.py new file mode 100644 index 0000000..3363e96 --- /dev/null +++ b/src/debputy/commands/debputy_cmd/context.py @@ -0,0 +1,607 @@ +import argparse +import dataclasses +import errno +import os +from typing import ( + Optional, + Tuple, + Mapping, + FrozenSet, + Set, + Union, + Sequence, + Iterable, + Callable, + Dict, + TYPE_CHECKING, +) + +from debian.debian_support import DpkgArchTable + +from debputy._deb_options_profiles import DebBuildOptionsAndProfiles +from debputy.architecture_support import ( + DpkgArchitectureBuildProcessValuesTable, + dpkg_architecture_table, +) +from debputy.exceptions import DebputyRuntimeError +from debputy.filesystem_scan import FSROOverlay +from debputy.highlevel_manifest import HighLevelManifest +from debputy.highlevel_manifest_parser import YAMLManifestParser +from debputy.packages import SourcePackage, BinaryPackage, parse_source_debian_control +from debputy.plugin.api import VirtualPath +from debputy.plugin.api.impl import load_plugin_features +from debputy.plugin.api.feature_set import PluginProvidedFeatureSet +from debputy.substitution import ( + Substitution, + VariableContext, + SubstitutionImpl, + NULL_SUBSTITUTION, +) +from debputy.util import _error, PKGNAME_REGEX, resolve_source_date_epoch, setup_logging + +if TYPE_CHECKING: + from argparse import _SubParsersAction + + +CommandHandler = Callable[["CommandContext"], None] +ArgparserConfigurator = Callable[[argparse.ArgumentParser], None] + + +def add_arg( + *name_or_flags: str, + **kwargs, +) -> Callable[[argparse.ArgumentParser], None]: + def _configurator(argparser: argparse.ArgumentParser) -> None: + argparser.add_argument( + *name_or_flags, + **kwargs, + ) + + return _configurator + + +@dataclasses.dataclass(slots=True, frozen=True) +class CommandArg: + parsed_args: argparse.Namespace + plugin_search_dirs: Sequence[str] + + +@dataclasses.dataclass +class Command: + handler: Callable[["CommandContext"], None] + require_substitution: bool = True + requested_plugins_only: bool = False + + +class CommandContext: + def __init__( + self, + parsed_args: argparse.Namespace, + plugin_search_dirs: Sequence[str], + require_substitution: bool = True, + requested_plugins_only: bool = False, + ) -> None: + self.parsed_args = parsed_args + self.plugin_search_dirs = plugin_search_dirs + self._require_substitution = require_substitution + self._requested_plugins_only = requested_plugins_only + self._debputy_plugin_feature_set: PluginProvidedFeatureSet = ( + PluginProvidedFeatureSet() + ) + self._debian_dir = FSROOverlay.create_root_dir("debian", "debian") + self._mtime: Optional[int] = None + self._source_variables: Optional[Mapping[str, str]] = None + self._substitution: Optional[Substitution] = None + self._requested_plugins: Optional[Sequence[str]] = None + self._plugins_loaded = False + self._dctrl_data: Optional[ + Tuple[ + DpkgArchitectureBuildProcessValuesTable, + DpkgArchTable, + DebBuildOptionsAndProfiles, + "SourcePackage", + Mapping[str, "BinaryPackage"], + ] + ] = None + + @property + def debian_dir(self) -> VirtualPath: + return self._debian_dir + + @property + def mtime(self) -> int: + if self._mtime is None: + self._mtime = resolve_source_date_epoch( + None, + substitution=self.substitution, + ) + return self._mtime + + def source_package(self) -> SourcePackage: + _a, _b, _c, source, _d = self._parse_dctrl() + return source + + def binary_packages(self) -> Mapping[str, "BinaryPackage"]: + _a, _b, _c, _source, binary_package_table = self._parse_dctrl() + return binary_package_table + + def requested_plugins(self) -> Sequence[str]: + if self._requested_plugins is None: + self._requested_plugins = self._resolve_requested_plugins() + return self._requested_plugins + + def required_plugins(self) -> Set[str]: + return set(getattr(self.parsed_args, "required_plugins") or []) + + @property + def deb_build_options_and_profiles(self) -> "DebBuildOptionsAndProfiles": + _a, _b, deb_build_options_and_profiles, _c, _d = self._parse_dctrl() + return deb_build_options_and_profiles + + @property + def deb_build_options(self) -> Mapping[str, Optional[str]]: + return self.deb_build_options_and_profiles.deb_build_options + + def _create_substitution( + self, + parsed_args: argparse.Namespace, + plugin_feature_set: PluginProvidedFeatureSet, + debian_dir: VirtualPath, + ) -> Substitution: + requested_subst = self._require_substitution + if hasattr(parsed_args, "substitution"): + requested_subst = parsed_args.substitution + if requested_subst is False and self._require_substitution: + _error(f"--no-substitution cannot be used with {parsed_args.command}") + if self._require_substitution or requested_subst is not False: + variable_context = VariableContext(debian_dir) + return SubstitutionImpl( + plugin_feature_set=plugin_feature_set, + unresolvable_substitutions=frozenset(["PACKAGE"]), + variable_context=variable_context, + ) + return NULL_SUBSTITUTION + + def load_plugins(self) -> PluginProvidedFeatureSet: + if not self._plugins_loaded: + requested_plugins = None + required_plugins = self.required_plugins() + if self._requested_plugins_only: + requested_plugins = self.requested_plugins() + debug_mode = getattr(self.parsed_args, "debug_mode", False) + load_plugin_features( + self.plugin_search_dirs, + self.substitution, + requested_plugins_only=requested_plugins, + required_plugins=required_plugins, + plugin_feature_set=self._debputy_plugin_feature_set, + debug_mode=debug_mode, + ) + self._plugins_loaded = True + return self._debputy_plugin_feature_set + + @staticmethod + def _plugin_from_dependency_field(dep_field: str) -> Iterable[str]: + package_prefix = "debputy-plugin-" + for dep_clause in (d.strip() for d in dep_field.split(",")): + dep = dep_clause.split("|")[0].strip() + if not dep.startswith(package_prefix): + continue + m = PKGNAME_REGEX.search(dep) + assert m + package_name = m.group(0) + plugin_name = package_name[len(package_prefix) :] + yield plugin_name + + def _resolve_requested_plugins(self) -> Sequence[str]: + _a, _b, _c, source_package, _d = self._parse_dctrl() + bd = source_package.fields.get("Build-Depends", "") + plugins = list(self._plugin_from_dependency_field(bd)) + for field_name in ("Build-Depends-Arch", "Build-Depends-Indep"): + f = source_package.fields.get(field_name) + if not f: + continue + for plugin in self._plugin_from_dependency_field(f): + raise DebputyRuntimeError( + f"Cannot load plugins via {field_name}:" + f" Please move debputy-plugin-{plugin} dependency to Build-Depends." + ) + + return plugins + + @property + def substitution(self) -> Substitution: + if self._substitution is None: + self._substitution = self._create_substitution( + self.parsed_args, + self._debputy_plugin_feature_set, + self.debian_dir, + ) + return self._substitution + + def _parse_dctrl( + self, + ) -> Tuple[ + DpkgArchitectureBuildProcessValuesTable, + DpkgArchTable, + DebBuildOptionsAndProfiles, + "SourcePackage", + Mapping[str, "BinaryPackage"], + ]: + if self._dctrl_data is None: + build_env = DebBuildOptionsAndProfiles.instance() + dpkg_architecture_variables = dpkg_architecture_table() + dpkg_arch_query_table = DpkgArchTable.load_arch_table() + + packages: Union[Set[str], FrozenSet[str]] = frozenset() + if hasattr(self.parsed_args, "packages"): + packages = self.parsed_args.packages + + try: + debian_control = self.debian_dir.get("control") + if debian_control is None: + raise FileNotFoundError( + errno.ENOENT, + os.strerror(errno.ENOENT), + os.path.join(self.debian_dir.fs_path, "control"), + ) + source_package, binary_packages = parse_source_debian_control( + debian_control, + packages, # -p/--package + set(), # -N/--no-package + False, # -i + False, # -a + dpkg_architecture_variables=dpkg_architecture_variables, + dpkg_arch_query_table=dpkg_arch_query_table, + build_env=build_env, + ) + assert packages <= binary_packages.keys() + except FileNotFoundError: + _error( + "This subcommand must be run from a source package root; expecting debian/control to exist." + ) + + self._dctrl_data = ( + dpkg_architecture_variables, + dpkg_arch_query_table, + build_env, + source_package, + binary_packages, + ) + + return self._dctrl_data + + @property + def has_dctrl_file(self) -> bool: + debian_control = self.debian_dir.get("control") + return debian_control is not None + + def manifest_parser( + self, + *, + manifest_path: Optional[str] = None, + ) -> YAMLManifestParser: + substitution = self.substitution + + ( + dpkg_architecture_variables, + dpkg_arch_query_table, + build_env, + source_package, + binary_packages, + ) = self._parse_dctrl() + + if self.parsed_args.debputy_manifest is not None: + manifest_path = self.parsed_args.debputy_manifest + if manifest_path is None: + manifest_path = os.path.join(self.debian_dir.fs_path, "debputy.manifest") + return YAMLManifestParser( + manifest_path, + source_package, + binary_packages, + substitution, + dpkg_architecture_variables, + dpkg_arch_query_table, + build_env, + self.load_plugins(), + debian_dir=self.debian_dir, + ) + + def parse_manifest( + self, + *, + manifest_path: Optional[str] = None, + ) -> HighLevelManifest: + substitution = self.substitution + manifest_required = False + + ( + dpkg_architecture_variables, + dpkg_arch_query_table, + build_env, + _, + binary_packages, + ) = self._parse_dctrl() + + if self.parsed_args.debputy_manifest is not None: + manifest_path = self.parsed_args.debputy_manifest + manifest_required = True + if manifest_path is None: + manifest_path = os.path.join(self.debian_dir.fs_path, "debputy.manifest") + parser = self.manifest_parser(manifest_path=manifest_path) + + os.environ["SOURCE_DATE_EPOCH"] = substitution.substitute( + "{{SOURCE_DATE_EPOCH}}", + "Internal resolution", + ) + if os.path.isfile(manifest_path): + return parser.parse_manifest() + if manifest_required: + _error(f'The path "{manifest_path}" is not a file!') + return parser.build_manifest() + + +class CommandBase: + __slots__ = () + + def configure(self, argparser: argparse.ArgumentParser) -> None: + # Does nothing by default + pass + + def __call__(self, command_arg: CommandArg) -> None: + raise NotImplementedError + + +class SubcommandBase(CommandBase): + __slots__ = ("name", "aliases", "help_description") + + def __init__( + self, + name: str, + *, + aliases: Sequence[str] = tuple(), + help_description: Optional[str] = None, + ) -> None: + self.name = name + self.aliases = aliases + self.help_description = help_description + + def add_subcommand_to_subparser( + self, + subparser: "_SubParsersAction", + ) -> argparse.ArgumentParser: + parser = subparser.add_parser( + self.name, + aliases=self.aliases, + help=self.help_description, + allow_abbrev=False, + ) + self.configure(parser) + return parser + + +class GenericSubCommand(SubcommandBase): + __slots__ = ( + "_handler", + "_configure_handler", + "_require_substitution", + "_requested_plugins_only", + "_log_only_to_stderr", + ) + + def __init__( + self, + name: str, + handler: Callable[[CommandContext], None], + *, + aliases: Sequence[str] = tuple(), + help_description: Optional[str] = None, + configure_handler: Optional[Callable[[argparse.ArgumentParser], None]] = None, + require_substitution: bool = True, + requested_plugins_only: bool = False, + log_only_to_stderr: bool = False, + ) -> None: + super().__init__(name, aliases=aliases, help_description=help_description) + self._handler = handler + self._configure_handler = configure_handler + self._require_substitution = require_substitution + self._requested_plugins_only = requested_plugins_only + self._log_only_to_stderr = log_only_to_stderr + + def configure_handler( + self, + handler: Callable[[argparse.ArgumentParser], None], + ) -> None: + if self._configure_handler is not None: + raise TypeError("Only one argument handler can be provided") + self._configure_handler = handler + + def configure(self, argparser: argparse.ArgumentParser) -> None: + handler = self._configure_handler + if handler is not None: + handler(argparser) + + def __call__(self, command_arg: CommandArg) -> None: + context = CommandContext( + command_arg.parsed_args, + command_arg.plugin_search_dirs, + self._require_substitution, + self._requested_plugins_only, + ) + if self._log_only_to_stderr: + setup_logging(reconfigure_logging=True, log_only_to_stderr=True) + return self._handler(context) + + +class DispatchingCommandMixin(CommandBase): + __slots__ = () + + def add_subcommand(self, subcommand: SubcommandBase) -> None: + raise NotImplementedError + + def add_dispatching_subcommand( + self, + name: str, + dest: str, + *, + aliases: Sequence[str] = tuple(), + help_description: Optional[str] = None, + metavar: str = "command", + default_subcommand: Optional[str] = None, + ) -> "DispatcherCommand": + ds = DispatcherCommand( + name, + dest, + aliases=aliases, + help_description=help_description, + metavar=metavar, + default_subcommand=default_subcommand, + ) + self.add_subcommand(ds) + return ds + + def register_subcommand( + self, + name: Union[str, Sequence[str]], + *, + help_description: Optional[str] = None, + argparser: Optional[ + Union[ArgparserConfigurator, Sequence[ArgparserConfigurator]] + ] = None, + require_substitution: bool = True, + requested_plugins_only: bool = False, + log_only_to_stderr: bool = False, + ) -> Callable[[CommandHandler], GenericSubCommand]: + if isinstance(name, str): + cmd_name = name + aliases = [] + else: + cmd_name = name[0] + aliases = name[1:] + + if argparser is not None and not callable(argparser): + args = argparser + + def _wrapper(parser: argparse.ArgumentParser) -> None: + for configurator in args: + configurator(parser) + + argparser = _wrapper + + def _annotation_impl(func: CommandHandler) -> GenericSubCommand: + subcommand = GenericSubCommand( + cmd_name, + func, + aliases=aliases, + help_description=help_description, + require_substitution=require_substitution, + requested_plugins_only=requested_plugins_only, + log_only_to_stderr=log_only_to_stderr, + ) + self.add_subcommand(subcommand) + if argparser is not None: + subcommand.configure_handler(argparser) + + return subcommand + + return _annotation_impl + + +class DispatcherCommand(SubcommandBase, DispatchingCommandMixin): + __slots__ = ( + "_subcommands", + "_aliases", + "_dest", + "_metavar", + "_required", + "_default_subcommand", + "_argparser", + ) + + def __init__( + self, + name: str, + dest: str, + *, + aliases: Sequence[str] = tuple(), + help_description: Optional[str] = None, + metavar: str = "command", + default_subcommand: Optional[str] = None, + ) -> None: + super().__init__(name, aliases=aliases, help_description=help_description) + self._aliases: Dict[str, SubcommandBase] = {} + self._subcommands: Dict[str, SubcommandBase] = {} + self._dest = dest + self._metavar = metavar + self._default_subcommand = default_subcommand + self._argparser: Optional[argparse.ArgumentParser] = None + + def add_subcommand(self, subcommand: SubcommandBase) -> None: + all_names = [subcommand.name] + if subcommand.aliases: + all_names.extend(subcommand.aliases) + aliases = self._aliases + for n in all_names: + if n in aliases: + raise ValueError( + f"Internal error: Multiple handlers for {n} on topic {self.name}" + ) + + aliases[n] = subcommand + self._subcommands[subcommand.name] = subcommand + + def configure(self, argparser: argparse.ArgumentParser) -> None: + if self._argparser is not None: + raise TypeError("Cannot configure twice!") + self._argparser = argparser + subcommands = self._subcommands + if not subcommands: + raise ValueError( + f"Internal error: No subcommands for subcommand {self.name} (then why do we have it?)" + ) + default_subcommand = self._default_subcommand + required = default_subcommand is None + if ( + default_subcommand is not None + and default_subcommand not in ("--help", "-h") + and default_subcommand not in subcommands + ): + raise ValueError( + f"Internal error: Subcommand {self.name} should have {default_subcommand} as default," + " but it was not registered?" + ) + subparser = argparser.add_subparsers( + dest=self._dest, + required=required, + metavar=self._metavar, + ) + for subcommand in subcommands.values(): + subcommand.add_subcommand_to_subparser(subparser) + + def has_command(self, command: str) -> bool: + return command in self._aliases + + def __call__(self, command_arg: CommandArg) -> None: + argparser = self._argparser + assert argparser is not None + v = getattr(command_arg.parsed_args, self._dest, None) + if v is None: + v = self._default_subcommand + if v in ("--help", "-h"): + argparser.parse_args([v]) + _error("Missing command", prog=argparser.prog) + + assert ( + v is not None + ), f"Internal error: No default subcommand and argparse did not provide the required subcommand {self._dest}?" + assert ( + v in self._aliases + ), f"Internal error: {v} was accepted as a topic, but it was not registered?" + self._aliases[v](command_arg) + + +ROOT_COMMAND = DispatcherCommand( + "root", + dest="command", + metavar="COMMAND", +) diff --git a/src/debputy/commands/debputy_cmd/dc_util.py b/src/debputy/commands/debputy_cmd/dc_util.py new file mode 100644 index 0000000..f54a4d1 --- /dev/null +++ b/src/debputy/commands/debputy_cmd/dc_util.py @@ -0,0 +1,15 @@ +from typing import Dict, Iterable + +from debputy.packager_provided_files import ( + PerPackagePackagerProvidedResult, + PackagerProvidedFile, +) + + +def flatten_ppfs( + all_ppfs: Dict[str, PerPackagePackagerProvidedResult] +) -> Iterable[PackagerProvidedFile]: + for matched_ppf in all_ppfs.values(): + yield from matched_ppf.auto_installable + for reserved_ppfs in matched_ppf.reserved_only.values(): + yield from reserved_ppfs diff --git a/src/debputy/commands/debputy_cmd/lint_and_lsp_cmds.py b/src/debputy/commands/debputy_cmd/lint_and_lsp_cmds.py new file mode 100644 index 0000000..0f2ae0f --- /dev/null +++ b/src/debputy/commands/debputy_cmd/lint_and_lsp_cmds.py @@ -0,0 +1,210 @@ +import textwrap +from argparse import BooleanOptionalAction + +from debputy.commands.debputy_cmd.context import ROOT_COMMAND, CommandContext, add_arg +from debputy.util import _error + + +_EDITOR_SNIPPETS = { + "emacs": "emacs+eglot", + "emacs+eglot": textwrap.dedent( + """\ + ;; `deputy lsp server` glue for emacs eglot (eglot is built-in these days) + ;; + ;; Add to ~/.emacs or ~/.emacs.d/init.el and then activate via `M-x eglot`. + ;; + ;; Requires: apt install elpa-dpkg-dev-el + + ;; Make emacs recognize debian/debputy.manifest as a YAML file + (add-to-list 'auto-mode-alist '("/debian/debputy.manifest\\'" . yaml-mode)) + ;; Inform eglot about the debputy LSP + (with-eval-after-load 'eglot + (add-to-list 'eglot-server-programs + '(debian-control-mode . ("debputy" "lsp" "server"))) + (add-to-list 'eglot-server-programs + '(debian-changelog-mode . ("debputy" "lsp" "server"))) + (add-to-list 'eglot-server-programs + '(debian-copyright-mode . ("debputy" "lsp" "server"))) + ;; The debian/rules file uses the qmake mode. + (add-to-list 'eglot-server-programs + '(makefile-gmake-mode . ("debputy" "lsp" "server"))) + ) + + ;; Auto-start eglot for the relevant modes. + (add-hook 'debian-control-mode-hook 'eglot-ensure) + ;; NOTE: changelog disabled by default because for some reason it + ;; this hook causes perceivable delay (several seconds) when + ;; opening the first changelog. It seems to be related to imenu. + ;; (add-hook 'debian-changelog-mode-hook 'eglot-ensure) + (add-hook 'debian-copyright-mode-hook 'eglot-ensure) + (add-hook 'makefile-gmake-mode-hook 'eglot-ensure) + """ + ), + "vim": "vim+youcompleteme", + "vim+youcompleteme": textwrap.dedent( + """\ + # debputy lsp server glue for vim with vim-youcompleteme. Add to ~/.vimrc + # + # Requires: apt install vim-youcompleteme + + # Make vim recognize debputy.manifest as YAML file + au BufNewFile,BufRead debputy.manifest setf yaml + # Inform vim/ycm about the debputy LSP + let g:ycm_language_server = [ + \\ { 'name': 'debputy', + \\ 'filetypes': [ 'debcontrol', 'debcopyright', 'debchangelog', 'make'], + \\ 'cmdline': [ 'debputy', 'lsp', 'server' ] + \\ }, + \\ ] + + packadd! youcompleteme + nmap <leader>d <plug>(YCMHover) + """ + ), +} + + +lsp_command = ROOT_COMMAND.add_dispatching_subcommand( + "lsp", + dest="lsp_command", + help_description="Language server related subcommands", +) + + +@lsp_command.register_subcommand( + "server", + log_only_to_stderr=True, + help_description="Start the language server", + argparser=[ + add_arg( + "--tcp", + action="store_true", + help="Use TCP server", + ), + add_arg( + "--ws", + action="store_true", + help="Use WebSocket server", + ), + add_arg( + "--host", + default="127.0.0.1", + help="Bind to this address (Use with --tcp / --ws)", + ), + add_arg( + "--port", + type=int, + default=2087, + help="Bind to this port (Use with --tcp / --ws)", + ), + ], +) +def lsp_server_cmd(context: CommandContext) -> None: + parsed_args = context.parsed_args + + try: + import lsprotocol + import pygls + except ImportError: + _error( + "This feature requires lsprotocol and pygls (apt-get install python3-lsprotocol python3-pygls)" + ) + + from debputy.lsp.lsp_features import ensure_lsp_features_are_loaded + from debputy.lsp.lsp_dispatch import DEBPUTY_LANGUAGE_SERVER + + ensure_lsp_features_are_loaded() + debputy_language_server = DEBPUTY_LANGUAGE_SERVER + + if parsed_args.tcp: + debputy_language_server.start_tcp(parsed_args.host, parsed_args.port) + elif parsed_args.ws: + debputy_language_server.start_ws(parsed_args.host, parsed_args.port) + else: + debputy_language_server.start_io() + + +@lsp_command.register_subcommand( + "editor-config", + help_description="Provide editor configuration snippets", + argparser=[ + add_arg( + "editor_name", + metavar="editor", + choices=_EDITOR_SNIPPETS, + help="The editor to provide a snippet for", + ), + ], +) +def lsp_editor_glue(context: CommandContext) -> None: + editor_name = context.parsed_args.editor_name + result = _EDITOR_SNIPPETS[editor_name] + while result in _EDITOR_SNIPPETS: + result = _EDITOR_SNIPPETS[result] + print(result) + + +@lsp_command.register_subcommand( + "features", + help_description="Describe language ids and features", +) +def lsp_editor_glue(_context: CommandContext) -> None: + try: + import lsprotocol + import pygls + except ImportError: + _error( + "This feature requires lsprotocol and pygls (apt-get install python3-lsprotocol python3-pygls)" + ) + + from debputy.lsp.lsp_features import describe_lsp_features + + describe_lsp_features() + + +@ROOT_COMMAND.register_subcommand( + "lint", + log_only_to_stderr=True, + argparser=[ + add_arg( + "--spellcheck", + dest="spellcheck", + action="store_true", + shared=True, + help="Enable spellchecking", + ), + add_arg( + "--auto-fix", + dest="auto_fix", + action="store_true", + shared=True, + help="Automatically fix problems with trivial or obvious corrections.", + ), + add_arg( + "--linter-exit-code", + dest="linter_exit_code", + default=True, + action=BooleanOptionalAction, + help='Enable or disable the "linter" convention of exiting with an error if severe issues were found', + ), + ], +) +def lint_cmd(context: CommandContext) -> None: + try: + import lsprotocol + except ImportError: + _error("This feature requires lsprotocol (apt-get install python3-lsprotocol)") + + from debputy.linting.lint_impl import perform_linting + + # For the side effect of validating that we are run from a debian directory. + context.binary_packages() + perform_linting(context) + + +def ensure_lint_and_lsp_commands_are_loaded(): + # Loading the module does the heavy lifting + # However, having this function means that we do not have an "unused" import that some tool + # gets tempted to remove + assert ROOT_COMMAND.has_command("lsp") + assert ROOT_COMMAND.has_command("lint") diff --git a/src/debputy/commands/debputy_cmd/output.py b/src/debputy/commands/debputy_cmd/output.py new file mode 100644 index 0000000..131338a --- /dev/null +++ b/src/debputy/commands/debputy_cmd/output.py @@ -0,0 +1,335 @@ +import argparse +import contextlib +import itertools +import os +import re +import shutil +import subprocess +import sys +from typing import ( + Union, + Sequence, + Iterable, + Iterator, + IO, + Mapping, + Tuple, + Optional, + Any, +) + +from debputy.util import assume_not_none + +try: + import colored +except ImportError: + colored = None + + +def _pager() -> Optional[str]: + pager = os.environ.get("DEBPUTY_PAGER") + if pager is None: + pager = os.environ.get("PAGER") + if pager is None and shutil.which("less") is not None: + pager = "less" + return pager + + +URL_START = "\033]8;;" +URL_END = "\033]8;;\a" +MAN_URL_REWRITE = re.compile(r"man:(\S+)[(](\d+)[)]") + +_SUPPORTED_COLORS = { + "black", + "red", + "green", + "yellow", + "blue", + "magenta", + "cyan", + "white", +} +_SUPPORTED_STYLES = {"none", "bold"} + + +class OutputStylingBase: + def __init__( + self, + stream: IO[str], + output_format: str, + *, + optimize_for_screen_reader: bool = False, + ) -> None: + self.stream = stream + self.output_format = output_format + self.optimize_for_screen_reader = optimize_for_screen_reader + self._color_support = None + + def colored( + self, + text: str, + *, + fg: Optional[Union[str]] = None, + bg: Optional[str] = None, + style: Optional[str] = None, + ) -> str: + self._check_color(fg) + self._check_color(bg) + self._check_text_style(style) + return text + + @property + def supports_colors(self) -> bool: + return False + + def print_list_table( + self, + headers: Sequence[Union[str, Tuple[str, str]]], + rows: Sequence[Sequence[str]], + ) -> None: + if rows: + if any(len(r) != len(rows[0]) for r in rows): + raise ValueError( + "Unbalanced table: All rows must have the same column count" + ) + if len(rows[0]) != len(headers): + raise ValueError( + "Unbalanced table: header list does not agree with row list on number of columns" + ) + + if not headers: + raise ValueError("No headers provided!?") + + cadjust = {} + header_names = [] + for c in headers: + if isinstance(c, str): + header_names.append(c) + else: + cname, adjust = c + header_names.append(cname) + cadjust[cname] = adjust + + if self.output_format == "csv": + from csv import writer + + w = writer(self.stream) + w.writerow(header_names) + w.writerows(rows) + return + + column_lengths = [ + max((len(h), max(len(r[i]) for r in rows))) + for i, h in enumerate(header_names) + ] + # divider => "+---+---+-...-+" + divider = "+-" + "-+-".join("-" * x for x in column_lengths) + "-+" + # row_format => '| {:<10} | {:<8} | ... |' where the numbers are the column lengths + row_format_inner = " | ".join( + f"{{CELL_COLOR}}{{:{cadjust.get(cn, '<')}{x}}}{{CELL_COLOR_RESET}}" + for cn, x in zip(header_names, column_lengths) + ) + + row_format = f"| {row_format_inner} |" + + if self.supports_colors: + c = self._color_support + assert c is not None + header_color = c.Style.bold + header_color_reset = c.Style.reset + else: + header_color = "" + header_color_reset = "" + + self.print_visual_formatting(divider) + self.print( + row_format.format( + *header_names, + CELL_COLOR=header_color, + CELL_COLOR_RESET=header_color_reset, + ) + ) + self.print_visual_formatting(divider) + for row in rows: + self.print(row_format.format(*row, CELL_COLOR="", CELL_COLOR_RESET="")) + self.print_visual_formatting(divider) + + def print(self, /, string: str = "", **kwargs) -> None: + if "file" in kwargs: + raise ValueError("Unsupported kwarg file") + print(string, file=self.stream, **kwargs) + + def print_visual_formatting(self, /, format_sequence: str, **kwargs) -> None: + if self.optimize_for_screen_reader: + return + self.print(format_sequence, **kwargs) + + def print_for_screen_reader(self, /, text: str, **kwargs) -> None: + if not self.optimize_for_screen_reader: + return + self.print(text, **kwargs) + + def _check_color(self, color: Optional[str]) -> None: + if color is not None and color not in _SUPPORTED_COLORS: + raise ValueError( + f"Unsupported color: {color}. Only the following are supported {','.join(_SUPPORTED_COLORS)}" + ) + + def _check_text_style(self, style: Optional[str]) -> None: + if style is not None and style not in _SUPPORTED_STYLES: + raise ValueError( + f"Unsupported style: {style}. Only the following are supported {','.join(_SUPPORTED_STYLES)}" + ) + + def render_url(self, link_url: str) -> str: + return link_url + + +class ANSIOutputStylingBase(OutputStylingBase): + def __init__( + self, + stream: IO[str], + output_format: str, + *, + support_colors: bool = True, + support_clickable_urls: bool = True, + **kwargs: Any, + ) -> None: + super().__init__(stream, output_format, **kwargs) + self._stream = stream + self._color_support = colored + self._support_colors = ( + support_colors if self._color_support is not None else False + ) + self._support_clickable_urls = support_clickable_urls + + @property + def supports_colors(self) -> bool: + return self._support_colors + + def colored( + self, + text: str, + *, + fg: Optional[str] = None, + bg: Optional[str] = None, + style: Optional[str] = None, + ) -> str: + self._check_color(fg) + self._check_color(bg) + self._check_text_style(style) + if not self.supports_colors: + return text + _colored = self._color_support + codes = [] + if style is not None: + code = getattr(_colored.Style, style) + assert code is not None + codes.append(code) + if fg is not None: + code = getattr(_colored.Fore, fg) + assert code is not None + codes.append(code) + if bg is not None: + code = getattr(_colored.Back, bg) + assert code is not None + codes.append(code) + if not codes: + return text + return "".join(codes) + text + _colored.Style.reset + + def render_url(self, link_url: str) -> str: + if not self._support_clickable_urls: + return super().render_url(link_url) + link_text = link_url + if not self.optimize_for_screen_reader and link_url.startswith("man:"): + # Rewrite manpage to a clickable link by default. I am not sure how the hyperlink + # ANSI code works with screen readers, so lets not rewrite the manpage link by + # default. My fear is that both the link url and the link text gets read out. + m = MAN_URL_REWRITE.match(link_url) + if m: + page, section = m.groups() + link_url = f"https://manpages.debian.org/{page}.{section}" + return URL_START + f"{link_url}\a{link_text}" + URL_END + + +def _output_styling( + parsed_args: argparse.Namespace, + stream: IO[str], +) -> OutputStylingBase: + output_format = getattr(parsed_args, "output_format", None) + if output_format is None: + output_format = "text" + optimize_for_screen_reader = os.environ.get("OPTIMIZE_FOR_SCREEN_READER", "") != "" + if not stream.isatty(): + return OutputStylingBase( + stream, output_format, optimize_for_screen_reader=optimize_for_screen_reader + ) + return ANSIOutputStylingBase( + stream, output_format, optimize_for_screen_reader=optimize_for_screen_reader + ) + + +@contextlib.contextmanager +def _stream_to_pager( + parsed_args: argparse.Namespace, +) -> Iterator[Tuple[IO[str], OutputStylingBase]]: + fancy_output = _output_styling(parsed_args, sys.stdout) + if ( + not parsed_args.pager + or not sys.stdout.isatty() + or fancy_output.output_format != "text" + ): + yield sys.stdout, fancy_output + return + + pager = _pager() + if pager is None: + yield sys.stdout, fancy_output + return + + env: Mapping[str, str] = os.environ + if "LESS" not in env: + env_copy = dict(os.environ) + env_copy["LESS"] = "-FRSXMQ" + env = env_copy + + cmd = subprocess.Popen( + pager, + stdin=subprocess.PIPE, + encoding="utf-8", + env=env, + ) + stdin = assume_not_none(cmd.stdin) + try: + fancy_output.stream = stdin + yield stdin, fancy_output + except Exception: + stdin.close() + cmd.kill() + cmd.wait() + raise + finally: + fancy_output.stream = sys.stdin + stdin.close() + cmd.wait() + + +def _normalize_cell(cell: Union[str, Sequence[str]], times: int) -> Iterable[str]: + if isinstance(cell, str): + return itertools.chain([cell], itertools.repeat("", times=times - 1)) + if not cell: + return itertools.repeat("", times=times) + return itertools.chain(cell, itertools.repeat("", times=times - len(cell))) + + +def _expand_rows( + rows: Sequence[Sequence[Union[str, Sequence[str]]]] +) -> Iterator[Sequence[str]]: + for row in rows: + if all(isinstance(c, str) for c in row): + yield row + else: + longest = max(len(c) if isinstance(c, list) else 1 for c in row) + cells = [_normalize_cell(c, times=longest) for c in row] + yield from zip(*cells) diff --git a/src/debputy/commands/debputy_cmd/plugin_cmds.py b/src/debputy/commands/debputy_cmd/plugin_cmds.py new file mode 100644 index 0000000..3d8bdcb --- /dev/null +++ b/src/debputy/commands/debputy_cmd/plugin_cmds.py @@ -0,0 +1,1364 @@ +import argparse +import operator +import os +import sys +from itertools import chain +from typing import ( + Sequence, + Union, + Tuple, + Iterable, + Any, + Optional, + Type, + Mapping, + Callable, +) + +from debputy import DEBPUTY_DOC_ROOT_DIR +from debputy.commands.debputy_cmd.context import ( + CommandContext, + add_arg, + ROOT_COMMAND, +) +from debputy.commands.debputy_cmd.dc_util import flatten_ppfs +from debputy.commands.debputy_cmd.output import ( + _stream_to_pager, + _output_styling, + OutputStylingBase, +) +from debputy.exceptions import DebputySubstitutionError +from debputy.filesystem_scan import build_virtual_fs +from debputy.manifest_parser.base_types import TypeMapping +from debputy.manifest_parser.declarative_parser import ( + DeclarativeMappingInputParser, + DeclarativeNonMappingInputParser, + BASIC_SIMPLE_TYPES, +) +from debputy.manifest_parser.parser_data import ParserContextData +from debputy.manifest_parser.util import unpack_type, AttributePath +from debputy.packager_provided_files import detect_all_packager_provided_files +from debputy.plugin.api.example_processing import ( + process_discard_rule_example, + DiscardVerdict, +) +from debputy.plugin.api.impl import plugin_metadata_for_debputys_own_plugin +from debputy.plugin.api.impl_types import ( + PackagerProvidedFileClassSpec, + PluginProvidedManifestVariable, + DispatchingParserBase, + DeclarativeInputParser, + DebputyPluginMetadata, + DispatchingObjectParser, + SUPPORTED_DISPATCHABLE_TABLE_PARSERS, + OPARSER_MANIFEST_ROOT, + PluginProvidedDiscardRule, + AutomaticDiscardRuleExample, + MetadataOrMaintscriptDetector, + PluginProvidedTypeMapping, +) +from debputy.plugin.api.spec import ( + ParserDocumentation, + reference_documentation, + undocumented_attr, + TypeMappingExample, +) +from debputy.substitution import Substitution +from debputy.util import _error, assume_not_none, _warn + +plugin_dispatcher = ROOT_COMMAND.add_dispatching_subcommand( + "plugin", + "plugin_subcommand", + default_subcommand="--help", + help_description="Interact with debputy plugins", + metavar="command", +) + +plugin_list_cmds = plugin_dispatcher.add_dispatching_subcommand( + "list", + "plugin_subcommand_list", + metavar="topic", + default_subcommand="plugins", + help_description="List plugins or things provided by plugins (unstable format)." + " Pass `--help` *after* `list` get a topic listing", +) + +plugin_show_cmds = plugin_dispatcher.add_dispatching_subcommand( + "show", + "plugin_subcommand_show", + metavar="topic", + help_description="Show details about a plugin or things provided by plugins (unstable format)." + " Pass `--help` *after* `show` get a topic listing", +) + + +def format_output_arg( + default_format: str, + allowed_formats: Sequence[str], + help_text: str, +) -> Callable[[argparse.ArgumentParser], None]: + if default_format not in allowed_formats: + raise ValueError("The default format must be in the allowed_formats...") + + def _configurator(argparser: argparse.ArgumentParser) -> None: + argparser.add_argument( + "--output-format", + dest="output_format", + default=default_format, + choices=allowed_formats, + help=help_text, + ) + + return _configurator + + +# To let --output-format=... "always" work +TEXT_ONLY_FORMAT = format_output_arg( + "text", + ["text"], + "Select a given output format (options and output are not stable between releases)", +) + + +TEXT_CSV_FORMAT_NO_STABILITY_PROMISE = format_output_arg( + "text", + ["text", "csv"], + "Select a given output format (options and output are not stable between releases)", +) + + +@plugin_list_cmds.register_subcommand( + "plugins", + help_description="List known plugins with their versions", + argparser=TEXT_CSV_FORMAT_NO_STABILITY_PROMISE, +) +def _plugin_cmd_list_plugins(context: CommandContext) -> None: + plugin_metadata_entries = context.load_plugins().plugin_data.values() + # Because the "plugins" part is optional, we are not guaranteed tha TEXT_CSV_FORMAT applies + output_format = getattr(context.parsed_args, "output_format", "text") + assert output_format in {"text", "csv"} + with _stream_to_pager(context.parsed_args) as (fd, fo): + fo.print_list_table( + ["Plugin Name", "Plugin Path"], + [(p.plugin_name, p.plugin_path) for p in plugin_metadata_entries], + ) + + +def _path(path: str) -> str: + if path.startswith("./"): + return path[1:] + return path + + +def _ppf_flags(ppf: PackagerProvidedFileClassSpec) -> str: + flags = [] + if ppf.allow_name_segment: + flags.append("named") + if ppf.allow_architecture_segment: + flags.append("arch") + if ppf.supports_priority: + flags.append(f"priority={ppf.default_priority}") + if ppf.packageless_is_fallback_for_all_packages: + flags.append("main-all-fallback") + if ppf.post_formatting_rewrite: + flags.append("post-format-hook") + return ",".join(flags) + + +@plugin_list_cmds.register_subcommand( + ["used-packager-provided-files", "uppf", "u-p-p-f"], + help_description="List packager provided files used by this package (debian/pkg.foo)", + argparser=TEXT_ONLY_FORMAT, +) +def _plugin_cmd_list_uppf(context: CommandContext) -> None: + ppf_table = context.load_plugins().packager_provided_files + all_ppfs = detect_all_packager_provided_files( + ppf_table, + context.debian_dir, + context.binary_packages(), + ) + requested_plugins = set(context.requested_plugins()) + requested_plugins.add("debputy") + all_detected_ppfs = list(flatten_ppfs(all_ppfs)) + + used_ppfs = [ + p + for p in all_detected_ppfs + if p.definition.debputy_plugin_metadata.plugin_name in requested_plugins + ] + inactive_ppfs = [ + p + for p in all_detected_ppfs + if p.definition.debputy_plugin_metadata.plugin_name not in requested_plugins + ] + + if not used_ppfs and not inactive_ppfs: + print("No packager provided files detected; not even a changelog... ?") + return + + with _stream_to_pager(context.parsed_args) as (fd, fo): + if used_ppfs: + headers: Sequence[Union[str, Tuple[str, str]]] = [ + "File", + "Matched Stem", + "Installed Into", + "Installed As", + ] + fo.print_list_table( + headers, + [ + ( + ppf.path.path, + ppf.definition.stem, + ppf.package_name, + "/".join(ppf.compute_dest()).lstrip("."), + ) + for ppf in sorted( + used_ppfs, key=operator.attrgetter("package_name") + ) + ], + ) + + if inactive_ppfs: + headers: Sequence[Union[str, Tuple[str, str]]] = [ + "UNUSED FILE", + "Matched Stem", + "Installed Into", + "Could Be Installed As", + "If B-D Had", + ] + fo.print_list_table( + headers, + [ + ( + f"~{ppf.path.path}~", + ppf.definition.stem, + f"~{ppf.package_name}~", + "/".join(ppf.compute_dest()).lstrip("."), + f"debputy-plugin-{ppf.definition.debputy_plugin_metadata.plugin_name}", + ) + for ppf in sorted( + inactive_ppfs, key=operator.attrgetter("package_name") + ) + ], + ) + + +@plugin_list_cmds.register_subcommand( + ["packager-provided-files", "ppf", "p-p-f"], + help_description="List packager provided file definitions (debian/pkg.foo)", + argparser=TEXT_CSV_FORMAT_NO_STABILITY_PROMISE, +) +def _plugin_cmd_list_ppf(context: CommandContext) -> None: + ppfs: Iterable[PackagerProvidedFileClassSpec] + ppfs = context.load_plugins().packager_provided_files.values() + with _stream_to_pager(context.parsed_args) as (fd, fo): + headers: Sequence[Union[str, Tuple[str, str]]] = [ + "Stem", + "Installed As", + ("Mode", ">"), + "Features", + "Provided by", + ] + fo.print_list_table( + headers, + [ + ( + ppf.stem, + _path(ppf.installed_as_format), + "0" + oct(ppf.default_mode)[2:], + _ppf_flags(ppf), + ppf.debputy_plugin_metadata.plugin_name, + ) + for ppf in sorted(ppfs, key=operator.attrgetter("stem")) + ], + ) + + if os.path.isdir("debian/") and fo.output_format == "text": + fo.print() + fo.print( + "Hint: You can use `debputy plugin list used-packager-provided-files` to have `debputy`", + ) + fo.print("list all the files in debian/ that matches these definitions.") + + +@plugin_list_cmds.register_subcommand( + ["metadata-detectors"], + help_description="List metadata detectors", + argparser=TEXT_CSV_FORMAT_NO_STABILITY_PROMISE, +) +def _plugin_cmd_list_metadata_detectors(context: CommandContext) -> None: + mds = list( + chain.from_iterable( + context.load_plugins().metadata_maintscript_detectors.values() + ) + ) + + def _sort_key(md: "MetadataOrMaintscriptDetector") -> Any: + return md.plugin_metadata.plugin_name, md.detector_id + + with _stream_to_pager(context.parsed_args) as (fd, fo): + fo.print_list_table( + ["Provided by", "Detector Id"], + [ + (md.plugin_metadata.plugin_name, md.detector_id) + for md in sorted(mds, key=_sort_key) + ], + ) + + +def _resolve_variable_for_list( + substitution: Substitution, + variable: PluginProvidedManifestVariable, +) -> str: + var = "{{" + variable.variable_name + "}}" + try: + value = substitution.substitute(var, "CLI request") + except DebputySubstitutionError: + value = None + return _render_manifest_variable_value(value) + + +def _render_manifest_variable_flag(variable: PluginProvidedManifestVariable) -> str: + flags = [] + if variable.is_for_special_case: + flags.append("special-use-case") + if variable.is_internal: + flags.append("internal") + return ",".join(flags) + + +def _render_list_filter(v: Optional[bool]) -> str: + if v is None: + return "N/A" + return "shown" if v else "hidden" + + +@plugin_list_cmds.register_subcommand( + ["manifest-variables"], + help_description="List plugin provided manifest variables (such as `{{path:FOO}}`)", +) +def plugin_cmd_list_manifest_variables(context: CommandContext) -> None: + variables = context.load_plugins().manifest_variables + substitution = context.substitution.with_extra_substitutions( + PACKAGE="<package-name>" + ) + parsed_args = context.parsed_args + show_special_case_vars = parsed_args.show_special_use_variables + show_token_vars = parsed_args.show_token_variables + show_all_vars = parsed_args.show_all_variables + + def _include_var(var: PluginProvidedManifestVariable) -> bool: + if show_all_vars: + return True + if var.is_internal: + return False + if var.is_for_special_case and not show_special_case_vars: + return False + if var.is_token and not show_token_vars: + return False + return True + + with _stream_to_pager(context.parsed_args) as (fd, fo): + fo.print_list_table( + ["Variable (use via: `{{ NAME }}`)", "Value", "Flag", "Provided by"], + [ + ( + k, + _resolve_variable_for_list(substitution, var), + _render_manifest_variable_flag(var), + var.plugin_metadata.plugin_name, + ) + for k, var in sorted(variables.items()) + if _include_var(var) + ], + ) + + fo.print() + + filters = [ + ( + "Token variables", + show_token_vars if not show_all_vars else None, + "--show-token-variables", + ), + ( + "Special use variables", + show_special_case_vars if not show_all_vars else None, + "--show-special-case-variables", + ), + ] + + fo.print_list_table( + ["Variable type", "Value", "Option"], + [ + ( + fname, + _render_list_filter(value or show_all_vars), + f"{option} OR --show-all-variables", + ) + for fname, value, option in filters + ], + ) + + +@plugin_cmd_list_manifest_variables.configure_handler +def list_manifest_variable_arg_parser( + plugin_list_manifest_variables_parser: argparse.ArgumentParser, +) -> None: + plugin_list_manifest_variables_parser.add_argument( + "--show-special-case-variables", + dest="show_special_use_variables", + default=False, + action="store_true", + help="Show variables that are only used in special / niche cases", + ) + plugin_list_manifest_variables_parser.add_argument( + "--show-token-variables", + dest="show_token_variables", + default=False, + action="store_true", + help="Show token (syntactical) variables like {{token:TAB}}", + ) + plugin_list_manifest_variables_parser.add_argument( + "--show-all-variables", + dest="show_all_variables", + default=False, + action="store_true", + help="Show all variables regardless of type/kind (overrules other filter settings)", + ) + TEXT_ONLY_FORMAT(plugin_list_manifest_variables_parser) + + +def _parser_type_name(v: Union[str, Type[Any]]) -> str: + if isinstance(v, str): + return v if v != "<ROOT>" else "" + return v.__name__ + + +@plugin_list_cmds.register_subcommand( + ["plugable-manifest-rules", "p-m-r", "pmr"], + help_description="Plugable manifest rules (such as install rules)", + argparser=TEXT_CSV_FORMAT_NO_STABILITY_PROMISE, +) +def _plugin_cmd_list_manifest_rules(context: CommandContext) -> None: + feature_set = context.load_plugins() + + # Type hint to make the chain call easier for the type checker, which does not seem + # to derive to this common base type on its own. + base_type = Iterable[Tuple[Union[str, Type[Any]], DispatchingParserBase[Any]]] + + table_parsers: base_type = feature_set.dispatchable_table_parsers.items() + object_parsers: base_type = feature_set.dispatchable_object_parsers.items() + + parsers = chain( + table_parsers, + object_parsers, + ) + + with _stream_to_pager(context.parsed_args) as (fd, fo): + fo.print_list_table( + ["Rule Name", "Rule Type", "Provided By"], + [ + ( + rn, + _parser_type_name(rt), + pt.parser_for(rn).plugin_metadata.plugin_name, + ) + for rt, pt in parsers + for rn in pt.registered_keywords() + ], + ) + + +@plugin_list_cmds.register_subcommand( + ["automatic-discard-rules", "a-d-r"], + help_description="List automatic discard rules", + argparser=TEXT_CSV_FORMAT_NO_STABILITY_PROMISE, +) +def _plugin_cmd_list_automatic_discard_rules(context: CommandContext) -> None: + auto_discard_rules = context.load_plugins().auto_discard_rules + + with _stream_to_pager(context.parsed_args) as (fd, fo): + fo.print_list_table( + ["Name", "Provided By"], + [ + ( + name, + ppdr.plugin_metadata.plugin_name, + ) + for name, ppdr in auto_discard_rules.items() + ], + ) + + +def _provide_placeholder_parser_doc( + parser_doc: Optional[ParserDocumentation], + attributes: Iterable[str], +) -> ParserDocumentation: + if parser_doc is None: + parser_doc = reference_documentation() + changes = {} + if parser_doc.attribute_doc is None: + changes["attribute_doc"] = [undocumented_attr(attr) for attr in attributes] + + if changes: + return parser_doc.replace(**changes) + return parser_doc + + +def _doc_args_parser_doc( + rule_name: str, + declarative_parser: DeclarativeInputParser[Any], + plugin_metadata: DebputyPluginMetadata, +) -> Tuple[Mapping[str, str], ParserDocumentation]: + attributes: Iterable[str] + if isinstance(declarative_parser, DeclarativeMappingInputParser): + attributes = declarative_parser.source_attributes.keys() + else: + attributes = [] + doc_args = { + "RULE_NAME": rule_name, + "MANIFEST_FORMAT_DOC": f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md", + "PLUGIN_NAME": plugin_metadata.plugin_name, + } + parser_doc = _provide_placeholder_parser_doc( + declarative_parser.inline_reference_documentation, + attributes, + ) + return doc_args, parser_doc + + +def _render_rule( + rule_name: str, + rule_type: str, + declarative_parser: DeclarativeInputParser[Any], + plugin_metadata: DebputyPluginMetadata, + manifest_attribute_path: str, +) -> None: + is_root_rule = rule_name == "::" + + doc_args, parser_doc = _doc_args_parser_doc( + "the manifest root" if is_root_rule else rule_name, + declarative_parser, + plugin_metadata, + ) + t = assume_not_none(parser_doc.title).format(**doc_args) + print(t) + print("=" * len(t)) + print() + + print(assume_not_none(parser_doc.description).format(**doc_args).rstrip()) + + print() + alt_form_parser = getattr(declarative_parser, "alt_form_parser", None) + if isinstance( + declarative_parser, (DeclarativeMappingInputParser, DispatchingObjectParser) + ): + if isinstance(declarative_parser, DeclarativeMappingInputParser): + attributes = declarative_parser.source_attributes + required = declarative_parser.input_time_required_parameters + conditionally_required = declarative_parser.at_least_one_of + mutually_exclusive = declarative_parser.mutually_exclusive_attributes + else: + attributes = {} + required = frozenset() + conditionally_required = frozenset() + mutually_exclusive = frozenset() + print("Attributes:") + attribute_docs = ( + parser_doc.attribute_doc if parser_doc.attribute_doc is not None else [] + ) + for attr_doc in assume_not_none(attribute_docs): + attr_description = attr_doc.description + prefix = " - " + + for parameter in sorted(attr_doc.attributes): + parameter_details = attributes.get(parameter) + if parameter_details is not None: + source_name = parameter_details.source_attribute_name + describe_type = parameter_details.type_validator.describe_type() + else: + assert isinstance(declarative_parser, DispatchingObjectParser) + source_name = parameter + subparser = declarative_parser.parser_for(source_name).parser + if isinstance(subparser, DispatchingObjectParser): + rule_prefix = rule_name if rule_name != "::" else "" + describe_type = f"Object (see `{rule_prefix}::{subparser.manifest_attribute_path_template}`)" + elif isinstance(subparser, DeclarativeMappingInputParser): + describe_type = "<Type definition not implemented yet>" # TODO: Derive from subparser + elif isinstance(subparser, DeclarativeNonMappingInputParser): + describe_type = ( + subparser.alt_form_parser.type_validator.describe_type() + ) + else: + describe_type = f"<Unknown: Non-introspectable subparser - {subparser.__class__.__name__}>" + + if source_name in required: + req_str = "required" + elif any(source_name in s for s in conditionally_required): + req_str = "conditional" + else: + req_str = "optional" + print(f"{prefix}`{source_name}` ({req_str}): {describe_type}") + prefix = " " + + if attr_description: + print() + for line in attr_description.format(**doc_args).splitlines( + keepends=False + ): + print(f" {line}") + print() + + if ( + bool(conditionally_required) + or bool(mutually_exclusive) + or any(pd.conflicting_attributes for pd in attributes.values()) + ): + print() + print("This rule enforces the following restrictions:") + + if conditionally_required: + for cr in conditionally_required: + anames = "`, `".join( + attributes[a].source_attribute_name for a in cr + ) + if cr in mutually_exclusive: + print(f" - The rule must use exactly one of: `{anames}`") + else: + print(f" - The rule must use at least one of: `{anames}`") + + if mutually_exclusive or any( + pd.conflicting_attributes for pd in attributes.values() + ): + for parameter, parameter_details in sorted(attributes.items()): + source_name = parameter_details.source_attribute_name + conflicts = set(parameter_details.conflicting_attributes) + for mx in mutually_exclusive: + if parameter in mx and mx not in conditionally_required: + conflicts |= mx + if conflicts: + conflicts.discard(parameter) + cnames = "`, `".join( + attributes[a].source_attribute_name for a in conflicts + ) + print( + f" - The attribute `{source_name}` cannot be used with any of: `{cnames}`" + ) + print() + if alt_form_parser is not None: + # FIXME: Mapping[str, Any] ends here, which is ironic given the headline. + print(f"Non-mapping format: {alt_form_parser.type_validator.describe_type()}") + alt_parser_desc = parser_doc.alt_parser_description + if alt_parser_desc: + for line in alt_parser_desc.format(**doc_args).splitlines(keepends=False): + print(f" {line}") + print() + + if declarative_parser.reference_documentation_url is not None: + print( + f"Reference documentation: {declarative_parser.reference_documentation_url}" + ) + else: + print( + "Reference documentation: No reference documentation link provided by the plugin" + ) + + if not is_root_rule: + print( + f"Used in: {manifest_attribute_path if manifest_attribute_path != '<ROOT>' else 'The manifest root'}" + ) + print(f"Rule reference: {rule_type}::{rule_name}") + print(f"Plugin: {plugin_metadata.plugin_name}") + else: + print(f"Rule reference: {rule_name}") + + print() + print( + "PS: If you want to know more about a non-trivial type of an attribute such as `FileSystemMatchRule`," + ) + print( + "you can use `debputy plugin show type-mapping FileSystemMatchRule` to look it up " + ) + + +def _render_manifest_variable_value(v: Optional[str]) -> str: + if v is None: + return "(N/A: Cannot resolve the variable)" + v = v.replace("\n", "\\n").replace("\t", "\\t") + return v + + +def _render_multiline_documentation( + documentation: str, + *, + first_line_prefix: str = "Documentation: ", + following_line_prefix: str = " ", +) -> None: + current_prefix = first_line_prefix + for line in documentation.splitlines(keepends=False): + if line.isspace(): + if not current_prefix.isspace(): + print(current_prefix.rstrip()) + current_prefix = following_line_prefix + else: + print() + continue + print(f"{current_prefix}{line}") + current_prefix = following_line_prefix + + +@plugin_show_cmds.register_subcommand( + ["manifest-variables"], + help_description="Plugin provided manifest variables (such as `{{path:FOO}}`)", + argparser=add_arg( + "manifest_variable", + metavar="manifest-variable", + help="Name of the variable (such as `path:FOO` or `{{path:FOO}}`) to display details about", + ), +) +def _plugin_cmd_show_manifest_variables(context: CommandContext) -> None: + plugin_feature_set = context.load_plugins() + variables = plugin_feature_set.manifest_variables + substitution = context.substitution + parsed_args = context.parsed_args + variable_name = parsed_args.manifest_variable + fo = _output_styling(context.parsed_args, sys.stdout) + if variable_name.startswith("{{") and variable_name.endswith("}}"): + variable_name = variable_name[2:-2] + variable: Optional[PluginProvidedManifestVariable] + if variable_name.startswith("env:") and len(variable_name) > 4: + env_var = variable_name[4:] + variable = PluginProvidedManifestVariable( + plugin_feature_set.plugin_data["debputy"], + variable_name, + variable_value=None, + is_context_specific_variable=False, + is_documentation_placeholder=True, + variable_reference_documentation=f'Environment variable "{env_var}"', + ) + else: + variable = variables.get(variable_name) + if variable is None: + _error( + f'Cannot resolve "{variable_name}" as a known variable from any of the available' + f" plugins. Please use `debputy plugin list manifest-variables` to list all known" + f" provided variables." + ) + + var_with_braces = "{{" + variable_name + "}}" + try: + source_value = substitution.substitute(var_with_braces, "CLI request") + except DebputySubstitutionError: + source_value = None + binary_value = source_value + print(f"Variable: {variable_name}") + fo.print_visual_formatting(f"=========={'=' * len(variable_name)}") + print() + + if variable.is_context_specific_variable: + try: + binary_value = substitution.with_extra_substitutions( + PACKAGE="<package-name>", + ).substitute(var_with_braces, "CLI request") + except DebputySubstitutionError: + binary_value = None + + doc = variable.variable_reference_documentation or "No documentation provided" + _render_multiline_documentation(doc) + + if source_value == binary_value: + print(f"Resolved: {_render_manifest_variable_value(source_value)}") + else: + print("Resolved:") + print(f" [source context]: {_render_manifest_variable_value(source_value)}") + print(f" [binary context]: {_render_manifest_variable_value(binary_value)}") + + if variable.is_for_special_case: + print( + 'Special-case: The variable has been marked as a "special-case"-only variable.' + ) + + if not variable.is_documentation_placeholder: + print(f"Plugin: {variable.plugin_metadata.plugin_name}") + + if variable.is_internal: + print() + # I knew everything I felt was showing on my face, and I hate that. I grated out, + print("That was private.") + + +def _determine_ppf( + context: CommandContext, +) -> Tuple[PackagerProvidedFileClassSpec, bool]: + feature_set = context.load_plugins() + ppf_name = context.parsed_args.ppf_name + try: + return feature_set.packager_provided_files[ppf_name], False + except KeyError: + pass + + orig_ppf_name = ppf_name + if ( + ppf_name.startswith("d/") + and not os.path.lexists(ppf_name) + and os.path.lexists("debian/" + ppf_name[2:]) + ): + ppf_name = "debian/" + ppf_name[2:] + + if ppf_name in ("debian/control", "debian/debputy.manifest", "debian/rules"): + if ppf_name == "debian/debputy.manifest": + doc = f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md" + else: + doc = "Debian Policy Manual or a packaging tutorial" + _error( + f"Sorry. While {orig_ppf_name} is a well-defined packaging file, it does not match the definition of" + f" a packager provided file. Please see {doc} for more information about this file" + ) + + if context.has_dctrl_file and os.path.lexists(ppf_name): + basename = ppf_name[7:] + if "/" not in basename: + debian_dir = build_virtual_fs([basename]) + all_ppfs = detect_all_packager_provided_files( + feature_set.packager_provided_files, + debian_dir, + context.binary_packages(), + ) + if all_ppfs: + matched = next(iter(all_ppfs.values())) + if len(matched.auto_installable) == 1 and not matched.reserved_only: + return matched.auto_installable[0].definition, True + if not matched.auto_installable and len(matched.reserved_only) == 1: + reserved = next(iter(matched.reserved_only.values())) + if len(reserved) == 1: + return reserved[0].definition, True + + _error( + f'Unknown packager provided file "{orig_ppf_name}". Please use' + f" `debputy plugin list packager-provided-files` to see them all." + ) + + +@plugin_show_cmds.register_subcommand( + ["packager-provided-files", "ppf", "p-p-f"], + help_description="Show details about a given packager provided file (debian/pkg.foo)", + argparser=add_arg( + "ppf_name", + metavar="name", + help="Name of the packager provided file (such as `changelog`) to display details about", + ), +) +def _plugin_cmd_show_ppf(context: CommandContext) -> None: + ppf, matched_file = _determine_ppf(context) + + fo = _output_styling(context.parsed_args, sys.stdout) + + fo.print(f"Packager Provided File: {ppf.stem}") + fo.print_visual_formatting(f"========================{'=' * len(ppf.stem)}") + fo.print() + ref_doc = ppf.reference_documentation + description = ref_doc.description if ref_doc else None + doc_uris = ref_doc.format_documentation_uris if ref_doc else tuple() + if description is None: + fo.print( + f"Sorry, no description provided by the plugin {ppf.debputy_plugin_metadata.plugin_name}." + ) + else: + for line in description.splitlines(keepends=False): + fo.print(line) + + fo.print() + fo.print("Features:") + if ppf.packageless_is_fallback_for_all_packages: + fo.print(f" * debian/{ppf.stem} is used for *ALL* packages") + else: + fo.print(f' * debian/{ppf.stem} is used for only for the "main" package') + if ppf.allow_name_segment: + fo.print(" * Supports naming segment (multiple files and custom naming).") + else: + fo.print( + " * No naming support; at most one per package and it is named after the package." + ) + if ppf.allow_architecture_segment: + fo.print(" * Supports architecture specific variants.") + else: + fo.print(" * No architecture specific variants.") + if ppf.supports_priority: + fo.print( + f" * Has a priority system (default priority: {ppf.default_priority})." + ) + + fo.print() + fo.print("Examples matches:") + + if context.has_dctrl_file: + first_pkg = next(iter(context.binary_packages())) + else: + first_pkg = "example-package" + example_files = [ + (f"debian/{ppf.stem}", first_pkg), + (f"debian/{first_pkg}.{ppf.stem}", first_pkg), + ] + if ppf.allow_name_segment: + example_files.append( + (f"debian/{first_pkg}.my.custom.name.{ppf.stem}", "my.custom.name") + ) + if ppf.allow_architecture_segment: + example_files.append((f"debian/{first_pkg}.{ppf.stem}.amd64", first_pkg)), + if ppf.allow_name_segment: + example_files.append( + ( + f"debian/{first_pkg}.my.custom.name.{ppf.stem}.amd64", + "my.custom.name", + ) + ) + fs_root = build_virtual_fs([x for x, _ in example_files]) + priority = ppf.default_priority if ppf.supports_priority else None + rendered_examples = [] + for example_file, assigned_name in example_files: + example_path = fs_root.lookup(example_file) + assert example_path is not None and example_path.is_file + dest = ppf.compute_dest( + assigned_name, + owning_package=first_pkg, + assigned_priority=priority, + path=example_path, + ) + dest_path = "/".join(dest).lstrip(".") + rendered_examples.append((example_file, dest_path)) + + fo.print_list_table(["Source file", "Installed As"], rendered_examples) + + if doc_uris: + fo.print() + fo.print("Documentation URIs:") + for uri in doc_uris: + fo.print(f" * {fo.render_url(uri)}") + + plugin_name = ppf.debputy_plugin_metadata.plugin_name + fo.print() + fo.print(f"Install Mode: 0{oct(ppf.default_mode)[2:]}") + fo.print(f"Provided by plugin: {plugin_name}") + if ( + matched_file + and plugin_name != "debputy" + and plugin_name not in context.requested_plugins() + ): + fo.print() + _warn( + f"The file might *NOT* be used due to missing Build-Depends on debputy-plugin-{plugin_name}" + ) + + +@plugin_show_cmds.register_subcommand( + ["plugable-manifest-rules", "p-m-r", "pmr"], + help_description="Plugable manifest rules (such as install rules)", + argparser=add_arg( + "pmr_rule_name", + metavar="rule-name", + help="Name of the rule (such as `install`) to display details about", + ), +) +def _plugin_cmd_show_manifest_rule(context: CommandContext) -> None: + feature_set = context.load_plugins() + parsed_args = context.parsed_args + req_rule_type = None + rule_name = parsed_args.pmr_rule_name + if "::" in rule_name and rule_name != "::": + req_rule_type, rule_name = rule_name.split("::", 1) + + matched = [] + + base_type = Iterable[Tuple[Union[str, Type[Any]], DispatchingParserBase[Any]]] + table_parsers: base_type = feature_set.dispatchable_table_parsers.items() + object_parsers: base_type = feature_set.dispatchable_object_parsers.items() + + parsers = chain( + table_parsers, + object_parsers, + ) + + for rule_type, dispatching_parser in parsers: + if req_rule_type is not None and req_rule_type not in _parser_type_name( + rule_type + ): + continue + if dispatching_parser.is_known_keyword(rule_name): + matched.append((rule_type, dispatching_parser)) + + if len(matched) != 1 and (matched or rule_name != "::"): + if not matched: + _error( + f"Could not find any plugable manifest rule related to {parsed_args.pmr_rule_name}." + f" Please use `debputy plugin list plugable-manifest-rules` to see the list of rules." + ) + match_a = matched[0][0] + match_b = matched[1][0] + _error( + f"The name {rule_name} was ambiguous and matched multiple rule types. Please use" + f" <rule-type>::{rule_name} to clarify which rule to use" + f" (such as {_parser_type_name(match_a)}::{rule_name} or {_parser_type_name(match_b)}::{rule_name})." + f" Please use `debputy plugin list plugable-manifest-rules` to see the list of rules." + ) + + if matched: + rule_type, matched_dispatching_parser = matched[0] + plugin_provided_parser = matched_dispatching_parser.parser_for(rule_name) + if isinstance(rule_type, str): + manifest_attribute_path = rule_type + else: + manifest_attribute_path = SUPPORTED_DISPATCHABLE_TABLE_PARSERS[rule_type] + parser_type_name = _parser_type_name(rule_type) + parser = plugin_provided_parser.parser + plugin_metadata = plugin_provided_parser.plugin_metadata + else: + rule_name = "::" + parser = feature_set.dispatchable_object_parsers[OPARSER_MANIFEST_ROOT] + parser_type_name = "" + plugin_metadata = plugin_metadata_for_debputys_own_plugin() + manifest_attribute_path = "" + + _render_rule( + rule_name, + parser_type_name, + parser, + plugin_metadata, + manifest_attribute_path, + ) + + +def _render_discard_rule_example( + fo: OutputStylingBase, + discard_rule: PluginProvidedDiscardRule, + example: AutomaticDiscardRuleExample, +) -> None: + processed = process_discard_rule_example(discard_rule, example) + + if processed.inconsistent_paths: + plugin_name = discard_rule.plugin_metadata.plugin_name + _warn( + f"This example is inconsistent with what the code actually does." + f" Please consider filing a bug against the plugin {plugin_name}" + ) + + doc = example.description + if doc: + print(doc) + + print("Consider the following source paths matched by a glob or directory match:") + print() + if fo.optimize_for_screen_reader: + for p, _ in processed.rendered_paths: + path_name = p.absolute + print( + f"The path {path_name} is a {'directory' if p.is_dir else 'file or symlink.'}" + ) + + print() + if any(v.is_consistent and v.is_discarded for _, v in processed.rendered_paths): + print("The following paths will be discarded by this rule:") + for p, verdict in processed.rendered_paths: + path_name = p.absolute + if verdict.is_consistent and verdict.is_discarded: + print() + if p.is_dir: + print(f"{path_name} along with anything beneath it") + else: + print(path_name) + else: + print("No paths will be discarded in this example.") + + print() + if any(v.is_consistent and v.is_kept for _, v in processed.rendered_paths): + print("The following paths will be not be discarded by this rule:") + for p, verdict in processed.rendered_paths: + path_name = p.absolute + if verdict.is_consistent and verdict.is_kept: + print() + print(path_name) + + if any(not v.is_consistent for _, v in processed.rendered_paths): + print() + print( + "The example was inconsistent with the code. These are the paths where the code disagrees with" + " the provided example:" + ) + for p, verdict in processed.rendered_paths: + path_name = p.absolute + if not verdict.is_consistent: + print() + if verdict == DiscardVerdict.DISCARDED_BY_CODE: + print( + f"The path {path_name} was discarded by the code, but the example said it should" + f" have been installed." + ) + else: + print( + f"The path {path_name} was not discarded by the code, but the example said it should" + f" have been discarded." + ) + return + + # Add +1 for dirs because we want trailing slashes in the output + max_len = max( + (len(p.absolute) + (1 if p.is_dir else 0)) for p, _ in processed.rendered_paths + ) + for p, verdict in processed.rendered_paths: + path_name = p.absolute + if p.is_dir: + path_name += "/" + + if not verdict.is_consistent: + print(f" {path_name:<{max_len}} !! {verdict.message}") + elif verdict.is_discarded: + print(f" {path_name:<{max_len}} << {verdict.message}") + else: + print(f" {path_name:<{max_len}}") + + +def _render_discard_rule( + context: CommandContext, + discard_rule: PluginProvidedDiscardRule, +) -> None: + fo = _output_styling(context.parsed_args, sys.stdout) + print(fo.colored(f"Automatic Discard Rule: {discard_rule.name}", style="bold")) + fo.print_visual_formatting( + f"========================{'=' * len(discard_rule.name)}" + ) + print() + doc = discard_rule.reference_documentation or "No documentation provided" + _render_multiline_documentation(doc, first_line_prefix="", following_line_prefix="") + + if len(discard_rule.examples) > 1: + print() + fo.print_visual_formatting("Examples") + fo.print_visual_formatting("--------") + print() + for no, example in enumerate(discard_rule.examples, start=1): + print( + fo.colored( + f"Example {no} of {len(discard_rule.examples)}", style="bold" + ) + ) + fo.print_visual_formatting(f"........{'.' * len(str(no))}") + _render_discard_rule_example(fo, discard_rule, example) + elif discard_rule.examples: + print() + print(fo.colored("Example", style="bold")) + fo.print_visual_formatting("-------") + print() + _render_discard_rule_example(fo, discard_rule, discard_rule.examples[0]) + + +@plugin_show_cmds.register_subcommand( + ["automatic-discard-rules", "a-d-r"], + help_description="Plugable manifest rules (such as install rules)", + argparser=add_arg( + "discard_rule", + metavar="automatic-discard-rule", + help="Name of the automatic discard rule (such as `backup-files`)", + ), +) +def _plugin_cmd_show_automatic_discard_rules(context: CommandContext) -> None: + auto_discard_rules = context.load_plugins().auto_discard_rules + name = context.parsed_args.discard_rule + discard_rule = auto_discard_rules.get(name) + if discard_rule is None: + _error( + f'No automatic discard rule with the name "{name}". Please use' + f" `debputy plugin list automatic-discard-rules` to see the list of automatic discard rules" + ) + + _render_discard_rule(context, discard_rule) + + +def _render_source_type(t: Any) -> str: + _, origin_type, args = unpack_type(t, False) + if origin_type == Union: + at = ", ".join(_render_source_type(st) for st in args) + return f"One of: {at}" + name = BASIC_SIMPLE_TYPES.get(t) + if name is not None: + return name + try: + return t.__name__ + except AttributeError: + return str(t) + + +@plugin_list_cmds.register_subcommand( + "type-mappings", + help_description="Registered type mappings/descriptions", +) +def _plugin_cmd_list_type_mappings(context: CommandContext) -> None: + type_mappings = context.load_plugins().mapped_types + + with _stream_to_pager(context.parsed_args) as (fd, fo): + fo.print_list_table( + ["Type", "Base Type", "Provided By"], + [ + ( + target_type.__name__, + _render_source_type(type_mapping.mapped_type.source_type), + type_mapping.plugin_metadata.plugin_name, + ) + for target_type, type_mapping in type_mappings.items() + ], + ) + + +@plugin_show_cmds.register_subcommand( + "type-mappings", + help_description="Register type mappings/descriptions", + argparser=add_arg( + "type_mapping", + metavar="type-mapping", + help="Name of the type", + ), +) +def _plugin_cmd_show_type_mappings(context: CommandContext) -> None: + type_mapping_name = context.parsed_args.type_mapping + type_mappings = context.load_plugins().mapped_types + + matches = [] + for type_ in type_mappings: + if type_.__name__ == type_mapping_name: + matches.append(type_) + + if not matches: + simple_types = set(BASIC_SIMPLE_TYPES.values()) + simple_types.update(t.__name__ for t in BASIC_SIMPLE_TYPES) + + if type_mapping_name in simple_types: + print(f"The type {type_mapping_name} is a YAML scalar.") + return + if type_mapping_name == "Any": + print( + "The Any type is a placeholder for when no typing information is provided. Often this implies" + " custom parse logic." + ) + return + + if type_mapping_name in ("List", "list"): + print( + f"The {type_mapping_name} is a YAML Sequence. Please see the YAML documentation for examples." + ) + return + + if type_mapping_name in ("Mapping", "dict"): + print( + f"The {type_mapping_name} is a YAML mapping. Please see the YAML documentation for examples." + ) + return + + if "[" in type_mapping_name: + _error( + f"No known matches for {type_mapping_name}. Note: It looks like a composite type. Try searching" + " for its component parts. As an example, replace List[FileSystemMatchRule] with FileSystemMatchRule." + ) + + _error(f"Sorry, no known matches for {type_mapping_name}") + + if len(matches) > 1: + _error( + f"Too many matches for {type_mapping_name}... Sorry, there is no way to avoid this right now :'(" + ) + + match = matches[0] + _render_type(context, type_mappings[match]) + + +def _render_type_example( + context: CommandContext, + fo: OutputStylingBase, + parser_context: ParserContextData, + type_mapping: TypeMapping[Any, Any], + example: TypeMappingExample, +) -> Tuple[str, bool]: + attr_path = AttributePath.builtin_path()["CLI Request"] + v = _render_value(example.source_input) + try: + type_mapping.mapper( + example.source_input, + attr_path, + parser_context, + ) + except RuntimeError: + if context.parsed_args.debug_mode: + raise + fo.print( + fo.colored("Broken example: ", fg="red") + + f"Provided example input ({v})" + + " caused an exception when parsed. Please file a bug against the plugin." + + " Use --debug to see the stack trace" + ) + return fo.colored(v, fg="red") + " [Example value could not be parsed]", True + return fo.colored(v, fg="green"), False + + +def _render_type( + context: CommandContext, + pptm: PluginProvidedTypeMapping, +) -> None: + fo = _output_styling(context.parsed_args, sys.stdout) + type_mapping = pptm.mapped_type + target_type = type_mapping.target_type + ref_doc = pptm.reference_documentation + desc = ref_doc.description if ref_doc is not None else None + examples = ref_doc.examples if ref_doc is not None else tuple() + + fo.print(fo.colored(f"# Type Mapping: {target_type.__name__}", style="bold")) + fo.print() + if desc is not None: + _render_multiline_documentation( + desc, first_line_prefix="", following_line_prefix="" + ) + else: + fo.print("No documentation provided.") + + context.parse_manifest() + + manifest_parser = context.manifest_parser() + + if examples: + had_issues = False + fo.print() + fo.print(fo.colored("## Example values", style="bold")) + fo.print() + for no, example in enumerate(examples, start=1): + v, i = _render_type_example( + context, fo, manifest_parser, type_mapping, example + ) + fo.print(f" * {v}") + if i: + had_issues = True + else: + had_issues = False + + fo.print() + fo.print(f"Provided by plugin: {pptm.plugin_metadata.plugin_name}") + + if had_issues: + fo.print() + fo.print( + fo.colored( + "Examples had issues. Please file a bug against the plugin", fg="red" + ) + ) + fo.print() + fo.print("Use --debug to see the stacktrace") + + +def _render_value(v: Any) -> str: + if isinstance(v, str) and '"' not in v: + return f'"{v}"' + return str(v) + + +def ensure_plugin_commands_are_loaded(): + # Loading the module does the heavy lifting + # However, having this function means that we do not have an "unused" import that some tool + # gets tempted to remove + assert ROOT_COMMAND.has_command("plugin") diff --git a/src/debputy/deb_packaging_support.py b/src/debputy/deb_packaging_support.py new file mode 100644 index 0000000..4cb4e8f --- /dev/null +++ b/src/debputy/deb_packaging_support.py @@ -0,0 +1,1489 @@ +import collections +import contextlib +import dataclasses +import datetime +import functools +import hashlib +import itertools +import operator +import os +import re +import subprocess +import tempfile +import textwrap +from contextlib import ExitStack +from tempfile import mkstemp +from typing import ( + Iterable, + List, + Optional, + Set, + Dict, + Sequence, + Tuple, + Iterator, + Literal, + TypeVar, + FrozenSet, + cast, +) + +import debian.deb822 +from debian.changelog import Changelog +from debian.deb822 import Deb822 + +from debputy._deb_options_profiles import DebBuildOptionsAndProfiles +from debputy.architecture_support import DpkgArchitectureBuildProcessValuesTable +from debputy.debhelper_emulation import ( + dhe_install_pkg_file_as_ctrl_file_if_present, + dhe_dbgsym_root_dir, +) +from debputy.elf_util import find_all_elf_files, ELF_MAGIC +from debputy.exceptions import DebputyDpkgGensymbolsError +from debputy.filesystem_scan import FSPath, FSROOverlay +from debputy.highlevel_manifest import ( + HighLevelManifest, + PackageTransformationDefinition, + BinaryPackageData, +) +from debputy.maintscript_snippet import ( + ALL_CONTROL_SCRIPTS, + MaintscriptSnippetContainer, + STD_CONTROL_SCRIPTS, +) +from debputy.packages import BinaryPackage, SourcePackage +from debputy.packaging.alternatives import process_alternatives +from debputy.packaging.debconf_templates import process_debconf_templates +from debputy.packaging.makeshlibs import ( + compute_shlibs, + ShlibsContent, + generate_shlib_dirs, +) +from debputy.plugin.api.feature_set import PluginProvidedFeatureSet +from debputy.plugin.api.impl import ServiceRegistryImpl +from debputy.plugin.api.impl_types import ( + MetadataOrMaintscriptDetector, + PackageDataTable, +) +from debputy.plugin.api.spec import ( + FlushableSubstvars, + VirtualPath, + PackageProcessingContext, +) +from debputy.util import ( + _error, + ensure_dir, + assume_not_none, + perl_module_dirs, + perlxs_api_dependency, + detect_fakeroot, + grouper, + _info, + xargs, + escape_shell, + generated_content_dir, + print_command, + _warn, +) + +VP = TypeVar("VP", bound=VirtualPath, covariant=True) + +_T64_REGEX = re.compile("^lib.*t64(?:-nss)?$") +_T64_PROVIDES = "t64:Provides" + + +def generate_md5sums_file(control_output_dir: str, fs_root: VirtualPath) -> None: + conffiles = os.path.join(control_output_dir, "conffiles") + md5sums = os.path.join(control_output_dir, "md5sums") + exclude = set() + if os.path.isfile(conffiles): + with open(conffiles, "rt") as fd: + for line in fd: + if not line.startswith("/"): + continue + exclude.add("." + line.rstrip("\n")) + had_content = False + files = sorted( + ( + path + for path in fs_root.all_paths() + if path.is_file and path.path not in exclude + ), + # Sort in the same order as dh_md5sums, which is not quite the same as dpkg/`all_paths()` + # Compare `.../doc/...` vs `.../doc-base/...` if you want to see the difference between + # the two approaches. + key=lambda p: p.path, + ) + with open(md5sums, "wt") as md5fd: + for member in files: + path = member.path + assert path.startswith("./") + path = path[2:] + with member.open(byte_io=True) as f: + file_hash = hashlib.md5() + while chunk := f.read(8192): + file_hash.update(chunk) + had_content = True + md5fd.write(f"{file_hash.hexdigest()} {path}\n") + if not had_content: + os.unlink(md5sums) + + +def install_or_generate_conffiles( + binary_package: BinaryPackage, + root_dir: str, + fs_root: VirtualPath, + debian_dir: VirtualPath, +) -> None: + conffiles_dest = os.path.join(root_dir, "conffiles") + dhe_install_pkg_file_as_ctrl_file_if_present( + debian_dir, + binary_package, + "conffiles", + root_dir, + 0o0644, + ) + etc_dir = fs_root.lookup("etc") + if etc_dir: + _add_conffiles(conffiles_dest, (p for p in etc_dir.all_paths() if p.is_file)) + if os.path.isfile(conffiles_dest): + os.chmod(conffiles_dest, 0o0644) + + +PERL_DEP_PROGRAM = 1 +PERL_DEP_INDEP_PM_MODULE = 2 +PERL_DEP_XS_MODULE = 4 +PERL_DEP_ARCH_PM_MODULE = 8 +PERL_DEP_MA_ANY_INCOMPATIBLE_TYPES = ~(PERL_DEP_PROGRAM | PERL_DEP_INDEP_PM_MODULE) + + +@functools.lru_cache(2) # In practice, param will be "perl" or "perl-base" +def _dpkg_perl_version(package: str) -> str: + dpkg_version = None + lines = ( + subprocess.check_output(["dpkg", "-s", package]) + .decode("utf-8") + .splitlines(keepends=False) + ) + for line in lines: + if line.startswith("Version: "): + dpkg_version = line[8:].strip() + break + assert dpkg_version is not None + return dpkg_version + + +def handle_perl_code( + dctrl_bin: BinaryPackage, + dpkg_architecture_variables: DpkgArchitectureBuildProcessValuesTable, + fs_root: FSPath, + substvars: FlushableSubstvars, +) -> None: + known_perl_inc_dirs = perl_module_dirs(dpkg_architecture_variables, dctrl_bin) + detected_dep_requirements = 0 + + # MakeMaker always makes lib and share dirs, but typically only one directory is actually used. + for perl_inc_dir in known_perl_inc_dirs: + p = fs_root.lookup(perl_inc_dir) + if p and p.is_dir: + p.prune_if_empty_dir() + + # FIXME: 80% of this belongs in a metadata detector, but that requires us to expose .walk() in the public API, + # which will not be today. + for d, pm_mode in [ + (known_perl_inc_dirs.vendorlib, PERL_DEP_INDEP_PM_MODULE), + (known_perl_inc_dirs.vendorarch, PERL_DEP_ARCH_PM_MODULE), + ]: + inc_dir = fs_root.lookup(d) + if not inc_dir: + continue + for path in inc_dir.all_paths(): + if not path.is_file: + continue + if path.name.endswith(".so"): + detected_dep_requirements |= PERL_DEP_XS_MODULE + elif path.name.endswith(".pm"): + detected_dep_requirements |= pm_mode + + for path, children in fs_root.walk(): + if path.path == "./usr/share/doc": + children.clear() + continue + if ( + not path.is_file + or not path.has_fs_path + or not (path.is_executable or path.name.endswith(".pl")) + ): + continue + + interpreter = path.interpreter() + if interpreter is not None and interpreter.command_full_basename == "perl": + detected_dep_requirements |= PERL_DEP_PROGRAM + + if not detected_dep_requirements: + return + dpackage = "perl" + # FIXME: Currently, dh_perl supports perl-base via manual toggle. + + dependency = dpackage + if not (detected_dep_requirements & PERL_DEP_MA_ANY_INCOMPATIBLE_TYPES): + dependency += ":any" + + if detected_dep_requirements & PERL_DEP_XS_MODULE: + dpkg_version = _dpkg_perl_version(dpackage) + dependency += f" (>= {dpkg_version})" + substvars.add_dependency("perl:Depends", dependency) + + if detected_dep_requirements & (PERL_DEP_XS_MODULE | PERL_DEP_ARCH_PM_MODULE): + substvars.add_dependency("perl:Depends", perlxs_api_dependency()) + + +def usr_local_transformation(dctrl: BinaryPackage, fs_root: VirtualPath) -> None: + path = fs_root.lookup("./usr/local") + if path and any(path.iterdir): + # There are two key issues: + # 1) Getting the generated maintscript carried on to the final maintscript + # 2) Making sure that manifest created directories do not trigger the "unused error". + _error( + f"Replacement of /usr/local paths is currently not supported in debputy (triggered by: {dctrl.name})." + ) + + +def _find_and_analyze_systemd_service_files( + fs_root: VirtualPath, + systemd_service_dir: Literal["system", "user"], +) -> Iterable[VirtualPath]: + service_dirs = [ + f"./usr/lib/systemd/{systemd_service_dir}", + f"./lib/systemd/{systemd_service_dir}", + ] + aliases: Dict[str, List[str]] = collections.defaultdict(list) + seen = set() + all_files = [] + + for d in service_dirs: + system_dir = fs_root.lookup(d) + if not system_dir: + continue + for child in system_dir.iterdir: + if child.is_symlink: + dest = os.path.basename(child.readlink()) + aliases[dest].append(child.name) + elif child.is_file and child.name not in seen: + seen.add(child.name) + all_files.append(child) + + return all_files + + +def detect_systemd_user_service_files( + dctrl: BinaryPackage, + fs_root: VirtualPath, +) -> None: + for service_file in _find_and_analyze_systemd_service_files(fs_root, "user"): + _error( + f'Sorry, systemd user services files are not supported at the moment (saw "{service_file.path}"' + f" in {dctrl.name})" + ) + + +# Generally, this should match the release date of oldstable or oldoldstable +_DCH_PRUNE_CUT_OFF_DATE = datetime.date(2019, 7, 6) +_DCH_MIN_NUM_OF_ENTRIES = 4 + + +def _prune_dch_file( + package: BinaryPackage, + path: VirtualPath, + is_changelog: bool, + keep_versions: Optional[Set[str]], + *, + trim: bool = True, +) -> Tuple[bool, Optional[Set[str]]]: + # TODO: Process `d/changelog` once + # Note we cannot assume that changelog_file is always `d/changelog` as you can have + # per-package changelogs. + with path.open() as fd: + dch = Changelog(fd) + shortened = False + important_entries = 0 + binnmu_entries = [] + if is_changelog: + kept_entries = [] + for block in dch: + if block.other_pairs.get("binary-only", "no") == "yes": + # Always keep binNMU entries (they are always in the top) and they do not count + # towards our kept_entries limit + binnmu_entries.append(block) + continue + block_date = block.date + if block_date is None: + _error(f"The Debian changelog was missing date in sign off line") + entry_date = datetime.datetime.strptime( + block_date, "%a, %d %b %Y %H:%M:%S %z" + ).date() + if ( + trim + and entry_date < _DCH_PRUNE_CUT_OFF_DATE + and important_entries >= _DCH_MIN_NUM_OF_ENTRIES + ): + shortened = True + break + # Match debhelper in incrementing after the check. + important_entries += 1 + kept_entries.append(block) + else: + assert keep_versions is not None + # The NEWS files should match the version for the dch to avoid lintian warnings. + # If that means we remove all entries in the NEWS file, then we delete the NEWS + # file (see #1021607) + kept_entries = [b for b in dch if b.version in keep_versions] + shortened = len(dch) > len(kept_entries) + if shortened and not kept_entries: + path.unlink() + return True, None + + if not shortened and not binnmu_entries: + return False, None + + parent_dir = assume_not_none(path.parent_dir) + + with path.replace_fs_path_content() as fs_path, open( + fs_path, "wt", encoding="utf-8" + ) as fd: + for entry in kept_entries: + fd.write(str(entry)) + + if is_changelog and shortened: + # For changelog (rather than NEWS) files, add a note about how to + # get the full version. + msg = textwrap.dedent( + f"""\ + # Older entries have been removed from this changelog. + # To read the complete changelog use `apt changelog {package.name}`. + """ + ) + fd.write(msg) + + if binnmu_entries: + if package.is_arch_all: + _error( + f"The package {package.name} is architecture all, but it is built during a binNMU. A binNMU build" + " must not include architecture all packages" + ) + + with parent_dir.add_file( + f"{path.name}.{package.resolved_architecture}" + ) as binnmu_changelog, open( + binnmu_changelog.fs_path, + "wt", + encoding="utf-8", + ) as binnmu_fd: + for entry in binnmu_entries: + binnmu_fd.write(str(entry)) + + if not shortened: + return False, None + return True, {b.version for b in kept_entries} + + +def fixup_debian_changelog_and_news_file( + dctrl: BinaryPackage, + fs_root: VirtualPath, + is_native: bool, + build_env: DebBuildOptionsAndProfiles, +) -> None: + doc_dir = fs_root.lookup(f"./usr/share/doc/{dctrl.name}") + if not doc_dir: + return + changelog = doc_dir.get("changelog.Debian") + if changelog and is_native: + changelog.name = "changelog" + elif is_native: + changelog = doc_dir.get("changelog") + + trim = False if "notrimdch" in build_env.deb_build_options else True + + kept_entries = None + pruned_changelog = False + if changelog and changelog.has_fs_path: + pruned_changelog, kept_entries = _prune_dch_file( + dctrl, changelog, True, None, trim=trim + ) + + if not trim: + return + + news_file = doc_dir.get("NEWS.Debian") + if news_file and news_file.has_fs_path and pruned_changelog: + _prune_dch_file(dctrl, news_file, False, kept_entries) + + +_UPSTREAM_CHANGELOG_SOURCE_DIRS = [ + ".", + "doc", + "docs", +] +_UPSTREAM_CHANGELOG_NAMES = { + # The value is a priority to match the debhelper order. + # - The suffix weights heavier than the basename (because that is what debhelper did) + # + # We list the name/suffix in order of priority in the code. That makes it easier to + # see the priority directly, but it gives the "lowest" value to the most important items + f"{n}{s}": (sw, nw) + for (nw, n), (sw, s) in itertools.product( + enumerate(["changelog", "changes", "history"], start=1), + enumerate(["", ".txt", ".md", ".rst"], start=1), + ) +} +_NONE_TUPLE = (None, (0, 0)) + + +def _detect_upstream_changelog(names: Iterable[str]) -> Optional[str]: + matches = [] + for name in names: + match_priority = _UPSTREAM_CHANGELOG_NAMES.get(name.lower()) + if match_priority is not None: + matches.append((name, match_priority)) + return min(matches, default=_NONE_TUPLE, key=operator.itemgetter(1))[0] + + +def install_upstream_changelog( + dctrl_bin: BinaryPackage, + fs_root: FSPath, + source_fs_root: VirtualPath, +) -> None: + doc_dir = f"./usr/share/doc/{dctrl_bin.name}" + bdir = fs_root.lookup(doc_dir) + if bdir and not bdir.is_dir: + # "/usr/share/doc/foo -> bar" symlink. Avoid croaking on those per: + # https://salsa.debian.org/debian/debputy/-/issues/49 + return + + if bdir: + if bdir.get("changelog") or bdir.get("changelog.gz"): + # Upstream's build system already provided the changelog with the correct name. + # Accept that as the canonical one. + return + upstream_changelog = _detect_upstream_changelog( + p.name for p in bdir.iterdir if p.is_file and p.has_fs_path and p.size > 0 + ) + if upstream_changelog: + p = bdir.lookup(upstream_changelog) + assert p is not None # Mostly as a typing hint + p.name = "changelog" + return + for dirname in _UPSTREAM_CHANGELOG_SOURCE_DIRS: + dir_path = source_fs_root.lookup(dirname) + if not dir_path or not dir_path.is_dir: + continue + changelog_name = _detect_upstream_changelog( + p.name + for p in dir_path.iterdir + if p.is_file and p.has_fs_path and p.size > 0 + ) + if changelog_name: + if bdir is None: + bdir = fs_root.mkdirs(doc_dir) + bdir.insert_file_from_fs_path( + "changelog", + dir_path[changelog_name].fs_path, + ) + break + + +@dataclasses.dataclass(slots=True) +class _ElfInfo: + path: VirtualPath + fs_path: str + is_stripped: Optional[bool] = None + build_id: Optional[str] = None + dbgsym: Optional[FSPath] = None + + +def _elf_static_lib_walk_filter( + fs_path: VirtualPath, + children: List[VP], +) -> bool: + if ( + fs_path.name == ".build-id" + and assume_not_none(fs_path.parent_dir).name == "debug" + ): + children.clear() + return False + # Deal with some special cases, where certain files are not supposed to be stripped in a given directory + if "debug/" in fs_path.path or fs_path.name.endswith("debug/"): + # FIXME: We need a way to opt out of this per #468333/#1016122 + for so_file in (f for f in list(children) if f.name.endswith(".so")): + children.remove(so_file) + if "/guile/" in fs_path.path or fs_path.name == "guile": + for go_file in (f for f in list(children) if f.name.endswith(".go")): + children.remove(go_file) + return True + + +@contextlib.contextmanager +def _all_elf_files(fs_root: VirtualPath) -> Iterator[Dict[str, _ElfInfo]]: + all_elf_files = find_all_elf_files( + fs_root, + walk_filter=_elf_static_lib_walk_filter, + ) + if not all_elf_files: + yield {} + return + with ExitStack() as cm_stack: + resolved = ( + (p, cm_stack.enter_context(p.replace_fs_path_content())) + for p in all_elf_files + ) + elf_info = { + fs_path: _ElfInfo( + path=assume_not_none(fs_root.lookup(detached_path.path)), + fs_path=fs_path, + ) + for detached_path, fs_path in resolved + } + _resolve_build_ids(elf_info) + yield elf_info + + +def _find_all_static_libs( + fs_root: FSPath, +) -> Iterator[FSPath]: + for path, children in fs_root.walk(): + # Matching the logic of dh_strip for now. + if not _elf_static_lib_walk_filter(path, children): + continue + if not path.is_file: + continue + if path.name.startswith("lib") and path.name.endswith("_g.a"): + # _g.a are historically ignored. I do not remember why, but guessing the "_g" is + # an encoding of gcc's -g parameter into the filename (with -g meaning "I want debug + # symbols") + continue + if not path.has_fs_path: + continue + with path.open(byte_io=True) as fd: + magic = fd.read(8) + if magic not in (b"!<arch>\n", b"!<thin>\n"): + continue + # Maybe we should see if the first file looks like an index file. + # Three random .a samples suggests the index file is named "/" + # Not sure if we should skip past it and then do the ELF check or just assume + # that "index => static lib". + data = fd.read(1024 * 1024) + if b"\0" not in data and ELF_MAGIC not in data: + continue + yield path + + +@contextlib.contextmanager +def _all_static_libs(fs_root: FSPath) -> Iterator[List[str]]: + all_static_libs = list(_find_all_static_libs(fs_root)) + if not all_static_libs: + yield [] + return + with ExitStack() as cm_stack: + resolved: List[str] = [ + cm_stack.enter_context(p.replace_fs_path_content()) for p in all_static_libs + ] + yield resolved + + +_FILE_BUILD_ID_RE = re.compile(rb"BuildID(?:\[\S+\])?=([A-Fa-f0-9]+)") + + +def _resolve_build_ids(elf_info: Dict[str, _ElfInfo]) -> None: + static_cmd = ["file", "-00", "-N"] + if detect_fakeroot(): + static_cmd.append("--no-sandbox") + + for cmd in xargs(static_cmd, (i.fs_path for i in elf_info.values())): + _info(f"Looking up build-ids via: {escape_shell(*cmd)}") + output = subprocess.check_output(cmd) + + # Trailing "\0" gives an empty element in the end when splitting, so strip it out + lines = output.rstrip(b"\0").split(b"\0") + + for fs_path_b, verdict in grouper(lines, 2, incomplete="strict"): + fs_path = fs_path_b.decode("utf-8") + info = elf_info[fs_path] + info.is_stripped = b"not stripped" not in verdict + m = _FILE_BUILD_ID_RE.search(verdict) + if m: + info.build_id = m.group(1).decode("utf-8") + + +def _make_debug_file( + objcopy: str, fs_path: str, build_id: str, dbgsym_fs_root: FSPath +) -> FSPath: + dbgsym_dirname = f"./usr/lib/debug/.build-id/{build_id[0:2]}/" + dbgsym_basename = f"{build_id[2:]}.debug" + dbgsym_dir = dbgsym_fs_root.mkdirs(dbgsym_dirname) + if dbgsym_basename in dbgsym_dir: + return dbgsym_dir[dbgsym_basename] + # objcopy is a pain and includes the basename verbatim when you do `--add-gnu-debuglink` without having an option + # to overwrite the physical basename. So we have to ensure that the physical basename matches the installed + # basename. + with dbgsym_dir.add_file( + dbgsym_basename, + unlink_if_exists=False, + fs_basename_matters=True, + subdir_key="dbgsym-build-ids", + ) as dbgsym: + try: + subprocess.check_call( + [ + objcopy, + "--only-keep-debug", + "--compress-debug-sections", + fs_path, + dbgsym.fs_path, + ] + ) + except subprocess.CalledProcessError: + full_command = ( + f"{objcopy} --only-keep-debug --compress-debug-sections" + f" {escape_shell(fs_path, dbgsym.fs_path)}" + ) + _error( + f"Attempting to create a .debug file failed. Please review the error message from {objcopy} to" + f" understand what went wrong. Full command was: {full_command}" + ) + return dbgsym + + +def _strip_binary(strip: str, options: List[str], paths: Iterable[str]) -> None: + # We assume the paths are obtained via `p.replace_fs_path_content()`, + # which is the case at the time of written and should remain so forever. + it = iter(paths) + first = next(it, None) + if first is None: + return + static_cmd = [strip] + static_cmd.extend(options) + + for cmd in xargs(static_cmd, itertools.chain((first,), (f for f in it))): + _info(f"Removing unnecessary ELF debug info via: {escape_shell(*cmd)}") + try: + subprocess.check_call( + cmd, + stdin=subprocess.DEVNULL, + restore_signals=True, + ) + except subprocess.CalledProcessError: + _error( + f"Attempting to remove ELF debug info failed. Please review the error from {strip} above" + f" understand what went wrong." + ) + + +def _attach_debug(objcopy: str, elf_binary: VirtualPath, dbgsym: FSPath) -> None: + dbgsym_fs_path: str + with dbgsym.replace_fs_path_content() as dbgsym_fs_path: + cmd = [objcopy, "--add-gnu-debuglink", dbgsym_fs_path, elf_binary.fs_path] + print_command(*cmd) + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + _error( + f"Attempting to attach ELF debug link to ELF binary failed. Please review the error from {objcopy}" + f" above understand what went wrong." + ) + + +def _run_dwz( + dctrl: BinaryPackage, + dbgsym_fs_root: FSPath, + unstripped_elf_info: List[_ElfInfo], +) -> None: + if not unstripped_elf_info or dctrl.is_udeb: + return + dwz_cmd = ["dwz"] + dwz_ma_dir_name = f"usr/lib/debug/.dwz/{dctrl.deb_multiarch}" + dwz_ma_basename = f"{dctrl.name}.debug" + multifile = f"{dwz_ma_dir_name}/{dwz_ma_basename}" + build_time_multifile = None + if len(unstripped_elf_info) > 1: + fs_content_dir = generated_content_dir() + fd, build_time_multifile = mkstemp(suffix=dwz_ma_basename, dir=fs_content_dir) + os.close(fd) + dwz_cmd.append(f"-m{build_time_multifile}") + dwz_cmd.append(f"-M/{multifile}") + + # TODO: configuration for disabling multi-file and tweaking memory limits + + dwz_cmd.extend(e.fs_path for e in unstripped_elf_info) + + _info(f"Deduplicating ELF debug info via: {escape_shell(*dwz_cmd)}") + try: + subprocess.check_call(dwz_cmd) + except subprocess.CalledProcessError: + _error( + "Attempting to deduplicate ELF info via dwz failed. Please review the output from dwz above" + " to understand what went wrong." + ) + if build_time_multifile is not None and os.stat(build_time_multifile).st_size > 0: + dwz_dir = dbgsym_fs_root.mkdirs(dwz_ma_dir_name) + dwz_dir.insert_file_from_fs_path( + dwz_ma_basename, + build_time_multifile, + mode=0o644, + require_copy_on_write=False, + follow_symlinks=False, + ) + + +def relocate_dwarves_into_dbgsym_packages( + dctrl: BinaryPackage, + package_fs_root: FSPath, + dbgsym_fs_root: VirtualPath, +) -> List[str]: + # FIXME: hardlinks + with _all_static_libs(package_fs_root) as all_static_files: + if all_static_files: + strip = dctrl.cross_command("strip") + _strip_binary( + strip, + [ + "--strip-debug", + "--remove-section=.comment", + "--remove-section=.note", + "--enable-deterministic-archives", + "-R", + ".gnu.lto_*", + "-R", + ".gnu.debuglto_*", + "-N", + "__gnu_lto_slim", + "-N", + "__gnu_lto_v1", + ], + all_static_files, + ) + + with _all_elf_files(package_fs_root) as all_elf_files: + if not all_elf_files: + return [] + objcopy = dctrl.cross_command("objcopy") + strip = dctrl.cross_command("strip") + unstripped_elf_info = list( + e for e in all_elf_files.values() if not e.is_stripped + ) + + _run_dwz(dctrl, dbgsym_fs_root, unstripped_elf_info) + + for elf_info in unstripped_elf_info: + elf_info.dbgsym = _make_debug_file( + objcopy, + elf_info.fs_path, + assume_not_none(elf_info.build_id), + dbgsym_fs_root, + ) + + # Note: When run strip, we do so also on already stripped ELF binaries because that is what debhelper does! + # Executables (defined by mode) + _strip_binary( + strip, + ["--remove-section=.comment", "--remove-section=.note"], + (i.fs_path for i in all_elf_files.values() if i.path.is_executable), + ) + + # Libraries (defined by mode) + _strip_binary( + strip, + ["--remove-section=.comment", "--remove-section=.note", "--strip-unneeded"], + (i.fs_path for i in all_elf_files.values() if not i.path.is_executable), + ) + + for elf_info in unstripped_elf_info: + _attach_debug( + objcopy, + assume_not_none(elf_info.path), + assume_not_none(elf_info.dbgsym), + ) + + # Set for uniqueness + all_debug_info = sorted( + {assume_not_none(i.build_id) for i in unstripped_elf_info} + ) + + dbgsym_doc_dir = dbgsym_fs_root.mkdirs("./usr/share/doc/") + dbgsym_doc_dir.add_symlink(f"{dctrl.name}-dbgsym", dctrl.name) + return all_debug_info + + +def run_package_processors( + manifest: HighLevelManifest, + package_metadata_context: PackageProcessingContext, + fs_root: VirtualPath, +) -> None: + pppps = manifest.plugin_provided_feature_set.package_processors_in_order() + binary_package = package_metadata_context.binary_package + for pppp in pppps: + if not pppp.applies_to(binary_package): + continue + pppp.run_package_processor(fs_root, None, package_metadata_context) + + +def cross_package_control_files( + package_data_table: PackageDataTable, + manifest: HighLevelManifest, +) -> None: + errors = [] + combined_shlibs = ShlibsContent() + shlibs_dir = None + shlib_dirs: List[str] = [] + shlibs_local = manifest.debian_dir.get("shlibs.local") + if shlibs_local and shlibs_local.is_file: + with shlibs_local.open() as fd: + combined_shlibs.add_entries_from_shlibs_file(fd) + + debputy_plugin_metadata = manifest.plugin_provided_feature_set.plugin_data[ + "debputy" + ] + + for binary_package_data in package_data_table: + binary_package = binary_package_data.binary_package + if binary_package.is_arch_all or not binary_package.should_be_acted_on: + continue + control_output_dir = assume_not_none(binary_package_data.control_output_dir) + fs_root = binary_package_data.fs_root + package_state = manifest.package_state_for(binary_package.name) + related_udeb_package = ( + binary_package_data.package_metadata_context.related_udeb_package + ) + + udeb_package_name = related_udeb_package.name if related_udeb_package else None + ctrl = binary_package_data.ctrl_creator.for_plugin( + debputy_plugin_metadata, + "compute_shlibs", + ) + try: + soname_info_list = compute_shlibs( + binary_package, + control_output_dir, + fs_root, + manifest, + udeb_package_name, + ctrl, + package_state.reserved_packager_provided_files, + combined_shlibs, + ) + except DebputyDpkgGensymbolsError as e: + errors.append(e.message) + else: + if soname_info_list: + if shlibs_dir is None: + shlibs_dir = generated_content_dir( + subdir_key="_shlibs_materialization_dir" + ) + generate_shlib_dirs( + binary_package, + shlibs_dir, + soname_info_list, + shlib_dirs, + ) + if errors: + for error in errors: + _warn(error) + _error("Stopping due to the errors above") + + generated_shlibs_local = None + if combined_shlibs: + if shlibs_dir is None: + shlibs_dir = generated_content_dir(subdir_key="_shlibs_materialization_dir") + generated_shlibs_local = os.path.join(shlibs_dir, "shlibs.local") + with open(generated_shlibs_local, "wt", encoding="utf-8") as fd: + combined_shlibs.write_to(fd) + _info(f"Generated {generated_shlibs_local} for dpkg-shlibdeps") + + for binary_package_data in package_data_table: + binary_package = binary_package_data.binary_package + if binary_package.is_arch_all or not binary_package.should_be_acted_on: + continue + binary_package_data.ctrl_creator.shlibs_details = ( + generated_shlibs_local, + shlib_dirs, + ) + + +def setup_control_files( + binary_package_data: BinaryPackageData, + manifest: HighLevelManifest, + dbgsym_fs_root: VirtualPath, + dbgsym_ids: List[str], + package_metadata_context: PackageProcessingContext, + *, + allow_ctrl_file_management: bool = True, +) -> None: + binary_package = package_metadata_context.binary_package + control_output_dir = assume_not_none(binary_package_data.control_output_dir) + fs_root = binary_package_data.fs_root + package_state = manifest.package_state_for(binary_package.name) + + feature_set: PluginProvidedFeatureSet = manifest.plugin_provided_feature_set + metadata_maintscript_detectors = feature_set.metadata_maintscript_detectors + substvars = binary_package_data.substvars + + snippets = STD_CONTROL_SCRIPTS + if binary_package.is_udeb: + # FIXME: Add missing udeb scripts + snippets = ["postinst"] + + if allow_ctrl_file_management: + process_alternatives( + binary_package, + fs_root, + package_state.reserved_packager_provided_files, + package_state.maintscript_snippets, + ) + process_debconf_templates( + binary_package, + package_state.reserved_packager_provided_files, + package_state.maintscript_snippets, + substvars, + control_output_dir, + ) + + for service_manager_details in feature_set.service_managers.values(): + service_registry = ServiceRegistryImpl(service_manager_details) + service_manager_details.service_detector( + fs_root, + service_registry, + package_metadata_context, + ) + + ctrl = binary_package_data.ctrl_creator.for_plugin( + service_manager_details.plugin_metadata, + service_manager_details.service_manager, + ) + service_definitions = service_registry.detected_services + if not service_definitions: + continue + service_manager_details.service_integrator( + service_definitions, + ctrl, + package_metadata_context, + ) + + plugin_detector_definition: MetadataOrMaintscriptDetector + for plugin_detector_definition in itertools.chain.from_iterable( + metadata_maintscript_detectors.values() + ): + if not plugin_detector_definition.applies_to(binary_package): + continue + ctrl = binary_package_data.ctrl_creator.for_plugin( + plugin_detector_definition.plugin_metadata, + plugin_detector_definition.detector_id, + ) + plugin_detector_definition.run_detector( + fs_root, ctrl, package_metadata_context + ) + + for script in snippets: + _generate_snippet( + control_output_dir, + script, + package_state.maintscript_snippets, + ) + + else: + if package_state.maintscript_snippets: + for script, snippet_container in package_state.maintscript_snippets.items(): + for snippet in snippet_container.all_snippets(): + source = snippet.definition_source + _error( + f"This integration mode cannot use maintscript snippets" + f' (since dh_installdeb has already been called). However, "{source}" triggered' + f" a snippet for {script}. Please remove the offending definition if it is from" + f" the manifest or file a bug if it is caused by a built-in rule." + ) + + dh_staging_dir = os.path.join("debian", binary_package.name, "DEBIAN") + try: + with os.scandir(dh_staging_dir) as it: + existing_control_files = [ + f.path + for f in it + if f.is_file(follow_symlinks=False) + and f.name not in ("control", "md5sums") + ] + except FileNotFoundError: + existing_control_files = [] + + if existing_control_files: + cmd = ["cp", "-a"] + cmd.extend(existing_control_files) + cmd.append(control_output_dir) + print_command(*cmd) + subprocess.check_call(cmd) + + if binary_package.is_udeb: + _generate_control_files( + binary_package_data.source_package, + binary_package, + package_state, + control_output_dir, + fs_root, + substvars, + # We never built udebs due to #797391, so skip over this information, + # when creating the udeb + None, + None, + ) + return + + generated_triggers = list(binary_package_data.ctrl_creator.generated_triggers()) + if generated_triggers: + if not allow_ctrl_file_management: + for trigger in generated_triggers: + source = f"{trigger.provider.plugin_name}:{trigger.provider_source_id}" + _error( + f"This integration mode must not generate triggers" + f' (since dh_installdeb has already been called). However, "{source}" created' + f" a trigger. Please remove the offending definition if it is from" + f" the manifest or file a bug if it is caused by a built-in rule." + ) + + if generated_triggers: + dest_file = os.path.join(control_output_dir, "triggers") + with open(dest_file, "at", encoding="utf-8") as fd: + fd.writelines( + textwrap.dedent( + f"""\ + # Added by {t.provider_source_id} from {t.provider.plugin_name} + {t.dpkg_trigger_type} {t.dpkg_trigger_target} + """ + ) + for t in generated_triggers + ) + os.chmod(fd.fileno(), 0o644) + install_or_generate_conffiles( + binary_package, + control_output_dir, + fs_root, + manifest.debian_dir, + ) + _generate_control_files( + binary_package_data.source_package, + binary_package, + package_state, + control_output_dir, + fs_root, + substvars, + dbgsym_fs_root, + dbgsym_ids, + ) + + +def _generate_snippet( + control_output_dir: str, + script: str, + maintscript_snippets: Dict[str, MaintscriptSnippetContainer], +) -> None: + debputy_snippets = maintscript_snippets.get(script) + if debputy_snippets is None: + return + reverse = script in ("prerm", "postrm") + snippets = [ + debputy_snippets.generate_snippet(reverse=reverse), + debputy_snippets.generate_snippet(snippet_order="service", reverse=reverse), + ] + if reverse: + snippets = reversed(snippets) + full_content = "".join(f"{s}\n" for s in filter(None, snippets)) + if not full_content: + return + filename = os.path.join(control_output_dir, script) + with open(filename, "wt") as fd: + fd.write("#!/bin/sh\nset -e\n\n") + fd.write(full_content) + os.chmod(fd.fileno(), 0o755) + + +def _add_conffiles( + conffiles_dest: str, + conffile_matches: Iterable[VirtualPath], +) -> None: + with open(conffiles_dest, "at") as fd: + for conffile_match in conffile_matches: + conffile = conffile_match.absolute + assert conffile_match.is_file + fd.write(f"{conffile}\n") + if os.stat(conffiles_dest).st_size == 0: + os.unlink(conffiles_dest) + + +def _ensure_base_substvars_defined(substvars: FlushableSubstvars) -> None: + for substvar in ("misc:Depends", "misc:Pre-Depends"): + if substvar not in substvars: + substvars[substvar] = "" + + +def _compute_installed_size(fs_root: VirtualPath) -> int: + """Emulate dpkg-gencontrol's code for computing the default Installed-Size""" + size_in_kb = 0 + hard_links = set() + for path in fs_root.all_paths(): + if not path.is_dir and path.has_fs_path: + st = path.stat() + if st.st_nlink > 1: + hl_key = (st.st_dev, st.st_ino) + if hl_key in hard_links: + continue + hard_links.add(hl_key) + path_size = (st.st_size + 1023) // 1024 + elif path.is_symlink: + path_size = (len(path.readlink()) + 1023) // 1024 + else: + path_size = 1 + size_in_kb += path_size + return size_in_kb + + +def _generate_dbgsym_control_file_if_relevant( + binary_package: BinaryPackage, + dbgsym_fs_root: VirtualPath, + dbgsym_root_dir: str, + dbgsym_ids: str, + multi_arch: Optional[str], + extra_common_params: Sequence[str], +) -> None: + section = binary_package.archive_section + component = "" + extra_params = [] + if section is not None and "/" in section and not section.startswith("main/"): + component = section.split("/", 1)[1] + "/" + if multi_arch != "same": + extra_params.append("-UMulti-Arch") + extra_params.append("-UReplaces") + extra_params.append("-UBreaks") + dbgsym_control_dir = os.path.join(dbgsym_root_dir, "DEBIAN") + ensure_dir(dbgsym_control_dir) + # Pass it via cmd-line to make it more visible that we are providing the + # value. It also prevents the dbgsym package from picking up this value. + ctrl_fs_root = FSROOverlay.create_root_dir("DEBIAN", dbgsym_control_dir) + total_size = _compute_installed_size(dbgsym_fs_root) + _compute_installed_size( + ctrl_fs_root + ) + extra_params.append(f"-VInstalled-Size={total_size}") + extra_params.extend(extra_common_params) + + package = binary_package.name + dpkg_cmd = [ + "dpkg-gencontrol", + f"-p{package}", + # FIXME: Support d/<pkg>.changelog at some point. + "-ldebian/changelog", + "-T/dev/null", + f"-P{dbgsym_root_dir}", + f"-DPackage={package}-dbgsym", + "-DDepends=" + package + " (= ${binary:Version})", + f"-DDescription=debug symbols for {package}", + f"-DSection={component}debug", + f"-DBuild-Ids={dbgsym_ids}", + "-UPre-Depends", + "-URecommends", + "-USuggests", + "-UEnhances", + "-UProvides", + "-UEssential", + "-UConflicts", + "-DPriority=optional", + "-UHomepage", + "-UImportant", + "-UBuilt-Using", + "-UStatic-Built-Using", + "-DAuto-Built-Package=debug-symbols", + "-UProtected", + *extra_params, + ] + print_command(*dpkg_cmd) + try: + subprocess.check_call(dpkg_cmd) + except subprocess.CalledProcessError: + _error( + f"Attempting to generate DEBIAN/control file for {package}-dbgsym failed. Please review the output from " + " dpkg-gencontrol above to understand what went wrong." + ) + os.chmod(os.path.join(dbgsym_root_dir, "DEBIAN", "control"), 0o644) + + +def _all_parent_directories_of(directories: Iterable[str]) -> Set[str]: + result = {"."} + for path in directories: + current = os.path.dirname(path) + while current and current not in result: + result.add(current) + current = os.path.dirname(current) + return result + + +def _auto_compute_multi_arch( + binary_package: BinaryPackage, + control_output_dir: str, + fs_root: FSPath, +) -> Optional[str]: + resolved_arch = binary_package.resolved_architecture + if resolved_arch == "all": + return None + if any( + script + for script in ALL_CONTROL_SCRIPTS + if os.path.isfile(os.path.join(control_output_dir, script)) + ): + return None + + resolved_multiarch = binary_package.deb_multiarch + assert resolved_arch != "all" + acceptable_no_descend_paths = { + f"./usr/lib/{resolved_multiarch}", + f"./usr/include/{resolved_multiarch}", + } + acceptable_files = { + f"./usr/share/doc/{binary_package.name}/{basename}" + for basename in ( + "copyright", + "changelog.gz", + "changelog.Debian.gz", + f"changelog.Debian.{resolved_arch}.gz", + "NEWS.Debian", + "NEWS.Debian.gz", + "README.Debian", + "README.Debian.gz", + ) + } + acceptable_intermediate_dirs = _all_parent_directories_of( + itertools.chain(acceptable_no_descend_paths, acceptable_files) + ) + + for fs_path, children in fs_root.walk(): + path = fs_path.path + if path in acceptable_no_descend_paths: + children.clear() + continue + if path in acceptable_intermediate_dirs or path in acceptable_files: + continue + return None + + return "same" + + +@functools.lru_cache() +def _has_t64_enabled() -> bool: + try: + output = subprocess.check_output( + ["dpkg-buildflags", "--query-features", "abi"] + ).decode() + except (subprocess.CalledProcessError, FileNotFoundError): + return False + + for stanza in Deb822.iter_paragraphs(output): + if stanza.get("Feature") == "time64" and stanza.get("Enabled") == "yes": + return True + return False + + +def _t64_migration_substvar( + binary_package: BinaryPackage, + control_output_dir: str, + substvars: FlushableSubstvars, +) -> None: + name = binary_package.name + compat_name = binary_package.fields.get("X-Time64-Compat") + if compat_name is None and not _T64_REGEX.match(name): + return + + if not any( + os.path.isfile(os.path.join(control_output_dir, n)) + for n in ["symbols", "shlibs"] + ): + return + + if compat_name is None: + compat_name = name.replace("t64", "", 1) + if compat_name == name: + raise AssertionError( + f"Failed to derive a t64 compat name for {name}. Please file a bug against debputy." + " As a work around, you can explicitly provide a X-Time64-Compat header in debian/control" + " where you specify the desired compat name." + ) + + arch_bits = binary_package.package_deb_architecture_variable("ARCH_BITS") + + if arch_bits != "32" or not _has_t64_enabled(): + substvars.add_dependency( + _T64_PROVIDES, + f"{compat_name} (= ${{binary:Version}})", + ) + elif _T64_PROVIDES not in substvars: + substvars[_T64_PROVIDES] = "" + + +@functools.lru_cache() +def dpkg_field_list_pkg_dep() -> Sequence[str]: + try: + output = subprocess.check_output( + [ + "perl", + "-MDpkg::Control::Fields", + "-e", + r'print "$_\n" for field_list_pkg_dep', + ] + ) + except (FileNotFoundError, subprocess.CalledProcessError): + _error("Could not run perl -MDpkg::Control::Fields to get a list of fields") + return output.decode("utf-8").splitlines(keepends=False) + + +def _handle_relationship_substvars( + source: SourcePackage, + dctrl: BinaryPackage, + substvars: FlushableSubstvars, +) -> Optional[str]: + relationship_fields = dpkg_field_list_pkg_dep() + relationship_fields_lc = frozenset(x.lower() for x in relationship_fields) + substvar_fields = collections.defaultdict(list) + for substvar_name, substvar in substvars.as_substvar.items(): + if substvar.assignment_operator == "$=" or ":" not in substvar_name: + # Automatically handled; no need for manual merging. + continue + _, field = substvar_name.rsplit(":", 1) + field_lc = field.lower() + if field_lc not in relationship_fields_lc: + continue + substvar_fields[field_lc].append("${" + substvar_name + "}") + if not substvar_fields: + return None + + replacement_stanza = debian.deb822.Deb822(dctrl.fields) + + for field_name in relationship_fields: + field_name_lc = field_name.lower() + addendum = substvar_fields.get(field_name_lc) + if addendum is None: + # No merging required + continue + substvars_part = ", ".join(addendum) + existing_value = replacement_stanza.get(field_name) + + if existing_value is None or existing_value.isspace(): + final_value = substvars_part + else: + existing_value = existing_value.rstrip().rstrip(",") + final_value = f"{existing_value}, {substvars_part}" + replacement_stanza[field_name] = final_value + + tmpdir = generated_content_dir(package=dctrl) + with tempfile.NamedTemporaryFile( + mode="wb", + dir=tmpdir, + suffix="__DEBIAN_control", + delete=False, + ) as fd: + try: + cast("Any", source.fields).dump(fd) + except AttributeError: + debian.deb822.Deb822(source.fields).dump(fd) + fd.write(b"\n") + replacement_stanza.dump(fd) + return fd.name + + +def _generate_control_files( + source_package: SourcePackage, + binary_package: BinaryPackage, + package_state: PackageTransformationDefinition, + control_output_dir: str, + fs_root: FSPath, + substvars: FlushableSubstvars, + dbgsym_root_fs: Optional[VirtualPath], + dbgsym_build_ids: Optional[List[str]], +) -> None: + package = binary_package.name + extra_common_params = [] + extra_params_specific = [] + _ensure_base_substvars_defined(substvars) + if "Installed-Size" not in substvars: + # Pass it via cmd-line to make it more visible that we are providing the + # value. It also prevents the dbgsym package from picking up this value. + ctrl_fs_root = FSROOverlay.create_root_dir("DEBIAN", control_output_dir) + total_size = _compute_installed_size(fs_root) + _compute_installed_size( + ctrl_fs_root + ) + extra_params_specific.append(f"-VInstalled-Size={total_size}") + + ma_value = binary_package.fields.get("Multi-Arch") + if not binary_package.is_udeb and ma_value is None: + ma_value = _auto_compute_multi_arch(binary_package, control_output_dir, fs_root) + if ma_value is not None: + _info( + f'The package "{binary_package.name}" looks like it should be "Multi-Arch: {ma_value}" based' + ' on the contents and there is no explicit "Multi-Arch" field. Setting the Multi-Arch field' + ' accordingly in the binary. If this auto-correction is wrong, please add "Multi-Arch: no" to the' + ' relevant part of "debian/control" to disable this feature.' + ) + extra_params_specific.append(f"-DMulti-Arch={ma_value}") + elif ma_value == "no": + extra_params_specific.append("-UMulti-Arch") + + dbgsym_root_dir = dhe_dbgsym_root_dir(binary_package) + dbgsym_ids = " ".join(dbgsym_build_ids) if dbgsym_build_ids else "" + if package_state.binary_version is not None: + extra_common_params.append(f"-v{package_state.binary_version}") + + _t64_migration_substvar(binary_package, control_output_dir, substvars) + + with substvars.flush() as flushed_substvars: + if dbgsym_root_fs is not None and any( + f for f in dbgsym_root_fs.all_paths() if f.is_file + ): + _generate_dbgsym_control_file_if_relevant( + binary_package, + dbgsym_root_fs, + dbgsym_root_dir, + dbgsym_ids, + ma_value, + extra_common_params, + ) + generate_md5sums_file( + os.path.join(dbgsym_root_dir, "DEBIAN"), + dbgsym_root_fs, + ) + elif dbgsym_ids: + extra_common_params.append(f"-DBuild-Ids={dbgsym_ids}") + + dctrl = _handle_relationship_substvars( + source_package, + binary_package, + substvars, + ) + if dctrl is None: + dctrl = "debian/control" + + ctrl_file = os.path.join(control_output_dir, "control") + dpkg_cmd = [ + "dpkg-gencontrol", + f"-p{package}", + # FIXME: Support d/<pkg>.changelog at some point. + "-ldebian/changelog", + f"-c{dctrl}", + f"-T{flushed_substvars}", + f"-O{ctrl_file}", + f"-P{control_output_dir}", + *extra_common_params, + *extra_params_specific, + ] + print_command(*dpkg_cmd) + try: + subprocess.check_call(dpkg_cmd) + except subprocess.CalledProcessError: + _error( + f"Attempting to generate DEBIAN/control file for {package} failed. Please review the output from " + " dpkg-gencontrol above to understand what went wrong." + ) + os.chmod(ctrl_file, 0o644) + + if not binary_package.is_udeb: + generate_md5sums_file(control_output_dir, fs_root) diff --git a/src/debputy/debhelper_emulation.py b/src/debputy/debhelper_emulation.py new file mode 100644 index 0000000..88352bd --- /dev/null +++ b/src/debputy/debhelper_emulation.py @@ -0,0 +1,270 @@ +import dataclasses +import os.path +import re +import shutil +from re import Match +from typing import ( + Optional, + Callable, + Union, + Iterable, + Tuple, + Sequence, + cast, + Mapping, + Any, + Set, + List, +) + +from debputy.packages import BinaryPackage +from debputy.plugin.api import VirtualPath +from debputy.substitution import Substitution +from debputy.util import ensure_dir, print_command, _error + +SnippetReplacement = Union[str, Callable[[str], str]] +MAINTSCRIPT_TOKEN_NAME_PATTERN = r"[A-Za-z0-9_.+]+" +MAINTSCRIPT_TOKEN_NAME_REGEX = re.compile(MAINTSCRIPT_TOKEN_NAME_PATTERN) +MAINTSCRIPT_TOKEN_REGEX = re.compile(f"#({MAINTSCRIPT_TOKEN_NAME_PATTERN})#") +_ARCH_FILTER_START = re.compile(r"^\s*(\[([^]]*)])[ \t]+") +_ARCH_FILTER_END = re.compile(r"\s+(\[([^]]*)])\s*$") +_BUILD_PROFILE_FILTER = re.compile(r"(<([^>]*)>(?:\s+<([^>]*)>)*)") + + +class CannotEmulateExecutableDHConfigFile(Exception): + def message(self) -> str: + return cast("str", self.args[0]) + + def config_file(self) -> VirtualPath: + return cast("VirtualPath", self.args[1]) + + +@dataclasses.dataclass(slots=True, frozen=True) +class DHConfigFileLine: + config_file: VirtualPath + line_no: int + executable_config: bool + original_line: str + tokens: Sequence[str] + arch_filter: Optional[str] + build_profile_filter: Optional[str] + + def conditional_key(self) -> Tuple[str, ...]: + k = [] + if self.arch_filter is not None: + k.append("arch") + k.append(self.arch_filter) + if self.build_profile_filter is not None: + k.append("build-profiles") + k.append(self.build_profile_filter) + return tuple(k) + + def conditional(self) -> Optional[Mapping[str, Any]]: + filters = [] + if self.arch_filter is not None: + filters.append({"arch-matches": self.arch_filter}) + if self.build_profile_filter is not None: + filters.append({"build-profiles-matches": self.build_profile_filter}) + if not filters: + return None + if len(filters) == 1: + return filters[0] + return {"all-of": filters} + + +def dhe_dbgsym_root_dir(binary_package: BinaryPackage) -> str: + return os.path.join("debian", ".debhelper", binary_package.name, "dbgsym-root") + + +def read_dbgsym_file(binary_package: BinaryPackage) -> List[str]: + dbgsym_id_file = os.path.join( + "debian", ".debhelper", binary_package.name, "dbgsym-build-ids" + ) + try: + with open(dbgsym_id_file, "rt", encoding="utf-8") as fd: + return fd.read().split() + except FileNotFoundError: + return [] + + +def assert_no_dbgsym_migration(binary_package: BinaryPackage) -> None: + dbgsym_migration_file = os.path.join( + "debian", ".debhelper", binary_package.name, "dbgsym-migration" + ) + if os.path.lexists(dbgsym_migration_file): + _error( + "Sorry, debputy does not support dh_strip --dbgsym-migration feature. Please either finish the" + " migration first or migrate to debputy later" + ) + + +def _prune_match( + line: str, + match: Optional[Match[str]], + match_mapper: Optional[Callable[[Match[str]], str]] = None, +) -> Tuple[str, Optional[str]]: + if match is None: + return line, None + s, e = match.span() + if match_mapper: + matched_part = match_mapper(match) + else: + matched_part = line[s:e] + # We prune exactly the matched part and assume the regexes leaves behind spaces if they were important. + line = line[:s] + line[e:] + # One special-case, if the match is at the beginning or end, then we can safely discard left + # over whitespace. + return line.strip(), matched_part + + +def dhe_filedoublearray( + config_file: VirtualPath, + substitution: Substitution, + *, + allow_dh_exec_rename: bool = False, +) -> Iterable[DHConfigFileLine]: + with config_file.open() as fd: + is_executable = config_file.is_executable + for line_no, orig_line in enumerate(fd, start=1): + arch_filter = None + build_profile_filter = None + if ( + line_no == 1 + and is_executable + and not orig_line.startswith( + ("#!/usr/bin/dh-exec", "#! /usr/bin/dh-exec") + ) + ): + raise CannotEmulateExecutableDHConfigFile( + "Only #!/usr/bin/dh-exec based executables can be emulated", + config_file, + ) + orig_line = orig_line.rstrip("\n") + line = orig_line.strip() + if not line or line.startswith("#"): + continue + if is_executable: + if "=>" in line and not allow_dh_exec_rename: + raise CannotEmulateExecutableDHConfigFile( + 'Cannot emulate dh-exec\'s "=>" feature to rename files for the concrete file', + config_file, + ) + line, build_profile_filter = _prune_match( + line, + _BUILD_PROFILE_FILTER.search(line), + ) + line, arch_filter = _prune_match( + line, + _ARCH_FILTER_START.search(line) or _ARCH_FILTER_END.search(line), + # Remove the enclosing [] + lambda m: m.group(1)[1:-1].strip(), + ) + + parts = tuple( + substitution.substitute( + w, f'{config_file.path} line {line_no} token "{w}"' + ) + for w in line.split() + ) + yield DHConfigFileLine( + config_file, + line_no, + is_executable, + orig_line, + parts, + arch_filter, + build_profile_filter, + ) + + +def dhe_pkgfile( + debian_dir: VirtualPath, + binary_package: BinaryPackage, + basename: str, + always_fallback_to_packageless_variant: bool = False, + bug_950723_prefix_matching: bool = False, +) -> Optional[VirtualPath]: + # TODO: Architecture specific files + maybe_at_suffix = "@" if bug_950723_prefix_matching else "" + possible_names = [f"{binary_package.name}{maybe_at_suffix}.{basename}"] + if binary_package.is_main_package or always_fallback_to_packageless_variant: + possible_names.append( + f"{basename}@" if bug_950723_prefix_matching else basename + ) + + for name in possible_names: + match = debian_dir.get(name) + if match is not None and not match.is_dir: + return match + return None + + +def dhe_pkgdir( + debian_dir: VirtualPath, + binary_package: BinaryPackage, + basename: str, +) -> Optional[VirtualPath]: + possible_names = [f"{binary_package.name}.{basename}"] + if binary_package.is_main_package: + possible_names.append(basename) + + for name in possible_names: + match = debian_dir.get(name) + if match is not None and match.is_dir: + return match + return None + + +def dhe_install_pkg_file_as_ctrl_file_if_present( + debian_dir: VirtualPath, + binary_package: BinaryPackage, + basename: str, + control_output_dir: str, + mode: int, +) -> None: + source = dhe_pkgfile(debian_dir, binary_package, basename) + if source is None: + return + ensure_dir(control_output_dir) + dhe_install_path(source.fs_path, os.path.join(control_output_dir, basename), mode) + + +def dhe_install_path(source: str, dest: str, mode: int) -> None: + # TODO: "install -p -mXXXX foo bar" silently discards broken + # symlinks to install the file in place. (#868204) + print_command("install", "-p", f"-m{oct(mode)[2:]}", source, dest) + shutil.copyfile(source, dest) + os.chmod(dest, mode) + + +_FIND_DH_WITH = re.compile(r"--with(?:\s+|=)(\S+)") +_DEP_REGEX = re.compile("^([a-z0-9][-+.a-z0-9]+)", re.ASCII) + + +def parse_drules_for_addons(debian_rules: VirtualPath, sequences: Set[str]) -> None: + with debian_rules.open() as fd: + for line in fd: + if not line.startswith("\tdh "): + continue + for match in _FIND_DH_WITH.finditer(line): + sequence_def = match.group(1) + sequences.update(sequence_def.split(",")) + + +def extract_dh_addons_from_control( + source_paragraph: Mapping[str, str], + sequences: Set[str], +) -> None: + for f in ("Build-Depends", "Build-Depends-Indep", "Build-Depends-Arch"): + field = source_paragraph.get(f) + if not field: + continue + + for dep_clause in (d.strip() for d in field.split(",")): + match = _DEP_REGEX.match(dep_clause.strip()) + if not match: + continue + dep = match.group(1) + if not dep.startswith("dh-sequence-"): + continue + sequences.add(dep[12:]) diff --git a/src/debputy/dh_migration/__init__.py b/src/debputy/dh_migration/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/dh_migration/__init__.py diff --git a/src/debputy/dh_migration/migration.py b/src/debputy/dh_migration/migration.py new file mode 100644 index 0000000..1366f22 --- /dev/null +++ b/src/debputy/dh_migration/migration.py @@ -0,0 +1,344 @@ +import json +import os +import re +import subprocess +from itertools import chain +from typing import Optional, List, Callable, Set + +from debian.deb822 import Deb822 + +from debputy.debhelper_emulation import CannotEmulateExecutableDHConfigFile +from debputy.dh_migration.migrators import MIGRATORS +from debputy.dh_migration.migrators_impl import ( + read_dh_addon_sequences, + MIGRATION_TARGET_DH_DEBPUTY, + MIGRATION_TARGET_DH_DEBPUTY_RRR, +) +from debputy.dh_migration.models import ( + FeatureMigration, + AcceptableMigrationIssues, + UnsupportedFeature, + ConflictingChange, +) +from debputy.highlevel_manifest import HighLevelManifest +from debputy.manifest_parser.exceptions import ManifestParseException +from debputy.plugin.api import VirtualPath +from debputy.util import _error, _warn, _info, escape_shell, assume_not_none + + +def _print_migration_summary( + migrations: List[FeatureMigration], + compat: int, + min_compat_level: int, + required_plugins: Set[str], + requested_plugins: Optional[Set[str]], +) -> None: + warning_count = 0 + + for migration in migrations: + if not migration.anything_to_do: + continue + underline = "-" * len(migration.tagline) + if migration.warnings: + _warn(f"Summary for migration: {migration.tagline}") + _warn(f"-----------------------{underline}") + _warn(" /!\\ ATTENTION /!\\") + warning_count += len(migration.warnings) + for warning in migration.warnings: + _warn(f" * {warning}") + + if compat < min_compat_level: + if warning_count: + _warn("") + _warn("Supported debhelper compat check") + _warn("--------------------------------") + warning_count += 1 + _warn( + f"The migration tool assumes debhelper compat {min_compat_level}+ semantics, but this package" + f" is using compat {compat}. Consider upgrading the package to compat {min_compat_level}" + " first." + ) + + if required_plugins: + if requested_plugins is None: + warning_count += 1 + needed_plugins = ", ".join(f"debputy-plugin-{n}" for n in required_plugins) + if warning_count: + _warn("") + _warn("Missing debputy plugin check") + _warn("----------------------------") + _warn( + f"The migration tool could not read d/control and therefore cannot tell if all the required" + f" plugins have been requested. Please ensure that the package Build-Depends on: {needed_plugins}" + ) + else: + missing_plugins = required_plugins - requested_plugins + if missing_plugins: + warning_count += 1 + needed_plugins = ", ".join( + f"debputy-plugin-{n}" for n in missing_plugins + ) + if warning_count: + _warn("") + _warn("Missing debputy plugin check") + _warn("----------------------------") + _warn( + f"The migration tool asserted that the following `debputy` plugins would be required, which" + f" are not explicitly requested. Please add the following to Build-Depends: {needed_plugins}" + ) + + if warning_count: + _warn("") + _warn( + f"/!\\ Total number of warnings or manual migrations required: {warning_count}" + ) + + +def _dh_compat_level() -> Optional[int]: + try: + res = subprocess.check_output( + ["dh_assistant", "active-compat-level"], stderr=subprocess.DEVNULL + ) + except subprocess.CalledProcessError: + compat = None + else: + try: + compat = json.loads(res)["declared-compat-level"] + except RuntimeError: + compat = None + else: + if not isinstance(compat, int): + compat = None + return compat + + +def _requested_debputy_plugins(debian_dir: VirtualPath) -> Optional[Set[str]]: + ctrl_file = debian_dir.get("control") + if not ctrl_file: + return None + + dep_regex = re.compile("^([a-z0-9][-+.a-z0-9]+)", re.ASCII) + plugins = set() + + with ctrl_file.open() as fd: + ctrl = list(Deb822.iter_paragraphs(fd)) + source_paragraph = ctrl[0] if ctrl else {} + + for f in ("Build-Depends", "Build-Depends-Indep", "Build-Depends-Arch"): + field = source_paragraph.get(f) + if not field: + continue + + for dep_clause in (d.strip() for d in field.split(",")): + match = dep_regex.match(dep_clause.strip()) + if not match: + continue + dep = match.group(1) + if not dep.startswith("debputy-plugin-"): + continue + plugins.add(dep[15:]) + return plugins + + +def _check_migration_target( + debian_dir: VirtualPath, + migration_target: Optional[str], +) -> str: + r = read_dh_addon_sequences(debian_dir) + if r is None and migration_target is None: + _error("debian/control is missing and no migration target was provided") + bd_sequences, dr_sequences = r + all_sequences = bd_sequences | dr_sequences + + has_zz_debputy = "zz-debputy" in all_sequences or "debputy" in all_sequences + has_zz_debputy_rrr = "zz-debputy-rrr" in all_sequences + has_any_existing = has_zz_debputy or has_zz_debputy_rrr + + if migration_target == "dh-sequence-zz-debputy-rrr" and has_zz_debputy: + _error("Cannot migrate from (zz-)debputy to zz-debputy-rrr") + + if has_zz_debputy_rrr and not has_zz_debputy: + resolved_migration_target = MIGRATION_TARGET_DH_DEBPUTY_RRR + else: + resolved_migration_target = MIGRATION_TARGET_DH_DEBPUTY + + if migration_target is not None: + resolved_migration_target = migration_target + + if has_any_existing: + _info( + f'Using "{resolved_migration_target}" as migration target based on the packaging' + ) + else: + _info(f'Using "{resolved_migration_target}" as default migration target.') + + return resolved_migration_target + + +def migrate_from_dh( + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + permit_destructive_changes: Optional[bool], + migration_target: Optional[str], + manifest_parser_factory: Callable[[str], HighLevelManifest], +) -> None: + migrations = [] + compat = _dh_compat_level() + if compat is None: + _error( + 'Cannot detect declared compat level (try running "dh_assistant active-compat-level")' + ) + + debian_dir = manifest.debian_dir + mutable_manifest = assume_not_none(manifest.mutable_manifest) + + resolved_migration_target = _check_migration_target(debian_dir, migration_target) + + try: + for migrator in MIGRATORS[resolved_migration_target]: + feature_migration = FeatureMigration(migrator.__name__) + migrator( + debian_dir, + manifest, + acceptable_migration_issues, + feature_migration, + resolved_migration_target, + ) + migrations.append(feature_migration) + except CannotEmulateExecutableDHConfigFile as e: + _error( + f"Unable to process the executable dh config file {e.config_file().fs_path}: {e.message()}" + ) + except UnsupportedFeature as e: + msg = ( + f"Unable to migrate automatically due to missing features in debputy. The feature is:" + f"\n\n * {e.message}" + ) + keys = e.issue_keys + if keys: + primary_key = keys[0] + alt_keys = "" + if len(keys) > 1: + alt_keys = ( + f' Alternatively you can also use one of: {", ".join(keys[1:])}. Please note that some' + " of these may cover more cases." + ) + msg += ( + f"\n\nUse --acceptable-migration-issues={primary_key} to convert this into a warning and try again." + " However, you should only do that if you believe you can replace the functionality manually" + f" or the usage is obsolete / can be removed. {alt_keys}" + ) + _error(msg) + except ConflictingChange as e: + _error( + "The migration tool detected a conflict data being migrated and data already migrated / in the existing" + "manifest." + f"\n\n * {e.message}" + "\n\nPlease review the situation and resolve the conflict manually." + ) + + # We start on compat 12 for arch:any due to the new dh_makeshlibs and dh_installinit default + min_compat = 12 + min_compat = max( + (m.assumed_compat for m in migrations if m.assumed_compat is not None), + default=min_compat, + ) + + if compat < min_compat and "min-compat-level" not in acceptable_migration_issues: + # The migration summary special-cases the compat mismatch and warns for us. + _error( + f"The migration tool assumes debhelper compat {min_compat} or later but the package is only on" + f" compat {compat}. This may lead to incorrect result." + f"\n\nUse --acceptable-migration-issues=min-compat-level to convert this into a warning and" + f" try again, if you want to continue regardless." + ) + + requested_plugins = _requested_debputy_plugins(debian_dir) + required_plugins: Set[str] = set() + required_plugins.update( + chain.from_iterable( + m.required_plugins for m in migrations if m.required_plugins + ) + ) + + _print_migration_summary( + migrations, compat, min_compat, required_plugins, requested_plugins + ) + migration_count = sum((m.performed_changes for m in migrations), 0) + + if not migration_count: + _info( + "debputy was not able to find any (supported) migrations that it could perform for you." + ) + return + + if any(m.successful_manifest_changes for m in migrations): + new_manifest_path = manifest.manifest_path + ".new" + + with open(new_manifest_path, "w") as fd: + mutable_manifest.write_to(fd) + + try: + _info("Verifying the generating manifest") + manifest_parser_factory(new_manifest_path) + except ManifestParseException as e: + raise AssertionError( + "Could not parse the manifest generated from the migrator" + ) from e + + if permit_destructive_changes: + if os.path.isfile(manifest.manifest_path): + os.rename(manifest.manifest_path, manifest.manifest_path + ".orig") + os.rename(new_manifest_path, manifest.manifest_path) + _info(f"Updated manifest {manifest.manifest_path}") + else: + _info( + f'Created draft manifest "{new_manifest_path}" (rename to "{manifest.manifest_path}"' + " to activate it)" + ) + else: + _info("No manifest changes detected; skipping update of manifest.") + + removals: int = sum((len(m.remove_paths_on_success) for m in migrations), 0) + renames: int = sum((len(m.rename_paths_on_success) for m in migrations), 0) + + if renames: + if permit_destructive_changes: + _info("Paths being renamed:") + else: + _info("Migration *would* rename the following paths:") + for previous_path, new_path in ( + p for m in migrations for p in m.rename_paths_on_success + ): + _info(f" mv {escape_shell(previous_path, new_path)}") + + if removals: + if permit_destructive_changes: + _info("Removals:") + else: + _info("Migration *would* remove the following files:") + for path in (p for m in migrations for p in m.remove_paths_on_success): + _info(f" rm -f {escape_shell(path)}") + + if permit_destructive_changes is None: + print() + _info( + "If you would like to perform the migration, please re-run with --apply-changes." + ) + elif permit_destructive_changes: + for previous_path, new_path in ( + p for m in migrations for p in m.rename_paths_on_success + ): + os.rename(previous_path, new_path) + for path in (p for m in migrations for p in m.remove_paths_on_success): + os.unlink(path) + + print() + _info("Migrations performed successfully") + print() + _info( + "Remember to validate the resulting binary packages after rebuilding with debputy" + ) + else: + print() + _info("No migrations performed as requested") diff --git a/src/debputy/dh_migration/migrators.py b/src/debputy/dh_migration/migrators.py new file mode 100644 index 0000000..7e056ae --- /dev/null +++ b/src/debputy/dh_migration/migrators.py @@ -0,0 +1,67 @@ +from typing import Callable, List, Mapping + +from debputy.dh_migration.migrators_impl import ( + migrate_links_files, + migrate_maintscript, + migrate_tmpfile, + migrate_install_file, + migrate_installdocs_file, + migrate_installexamples_file, + migrate_dh_hook_targets, + migrate_misspelled_readme_debian_files, + migrate_doc_base_files, + migrate_lintian_overrides_files, + detect_unsupported_zz_debputy_features, + detect_pam_files, + detect_dh_addons, + migrate_not_installed_file, + migrate_installman_file, + migrate_bash_completion, + migrate_installinfo_file, + migrate_dh_installsystemd_files, + detect_obsolete_substvars, + detect_dh_addons_zz_debputy_rrr, + MIGRATION_TARGET_DH_DEBPUTY, + MIGRATION_TARGET_DH_DEBPUTY_RRR, +) +from debputy.dh_migration.models import AcceptableMigrationIssues, FeatureMigration +from debputy.highlevel_manifest import HighLevelManifest +from debputy.plugin.api import VirtualPath + +Migrator = Callable[ + [VirtualPath, HighLevelManifest, AcceptableMigrationIssues, FeatureMigration, str], + None, +] + + +MIGRATORS: Mapping[str, List[Migrator]] = { + MIGRATION_TARGET_DH_DEBPUTY_RRR: [ + migrate_dh_hook_targets, + migrate_misspelled_readme_debian_files, + detect_dh_addons_zz_debputy_rrr, + detect_obsolete_substvars, + ], + MIGRATION_TARGET_DH_DEBPUTY: [ + detect_unsupported_zz_debputy_features, + detect_pam_files, + migrate_dh_hook_targets, + migrate_dh_installsystemd_files, + migrate_install_file, + migrate_installdocs_file, + migrate_installexamples_file, + migrate_installman_file, + migrate_installinfo_file, + migrate_misspelled_readme_debian_files, + migrate_doc_base_files, + migrate_links_files, + migrate_maintscript, + migrate_tmpfile, + migrate_lintian_overrides_files, + migrate_bash_completion, + detect_dh_addons, + detect_obsolete_substvars, + # not-installed should go last, so its rules appear after other installations + # It is not perfect, but it is a start. + migrate_not_installed_file, + ], +} diff --git a/src/debputy/dh_migration/migrators_impl.py b/src/debputy/dh_migration/migrators_impl.py new file mode 100644 index 0000000..6613c25 --- /dev/null +++ b/src/debputy/dh_migration/migrators_impl.py @@ -0,0 +1,1706 @@ +import collections +import dataclasses +import json +import os +import re +import subprocess +from typing import ( + Iterable, + Optional, + Tuple, + List, + Set, + Mapping, + Any, + Union, + Callable, + TypeVar, + Dict, +) + +from debian.deb822 import Deb822 + +from debputy.architecture_support import dpkg_architecture_table +from debputy.deb_packaging_support import dpkg_field_list_pkg_dep +from debputy.debhelper_emulation import ( + dhe_filedoublearray, + DHConfigFileLine, + dhe_pkgfile, + parse_drules_for_addons, + extract_dh_addons_from_control, +) +from debputy.dh_migration.models import ( + ConflictingChange, + FeatureMigration, + UnsupportedFeature, + AcceptableMigrationIssues, + DHMigrationSubstitution, +) +from debputy.highlevel_manifest import ( + MutableYAMLSymlink, + HighLevelManifest, + MutableYAMLConffileManagementItem, + AbstractMutableYAMLInstallRule, +) +from debputy.installations import MAN_GUESS_FROM_BASENAME, MAN_GUESS_LANG_FROM_PATH +from debputy.packages import BinaryPackage +from debputy.plugin.api import VirtualPath +from debputy.util import ( + _error, + PKGVERSION_REGEX, + PKGNAME_REGEX, + _normalize_path, + assume_not_none, + has_glob_magic, +) + +MIGRATION_TARGET_DH_DEBPUTY_RRR = "dh-sequence-zz-debputy-rrr" +MIGRATION_TARGET_DH_DEBPUTY = "dh-sequence-zz-debputy" + + +# Align with debputy.py +DH_COMMANDS_REPLACED = { + MIGRATION_TARGET_DH_DEBPUTY_RRR: frozenset( + { + "dh_fixperms", + "dh_gencontrol", + "dh_md5sums", + "dh_builddeb", + } + ), + MIGRATION_TARGET_DH_DEBPUTY: frozenset( + { + "dh_install", + "dh_installdocs", + "dh_installchangelogs", + "dh_installexamples", + "dh_installman", + "dh_installcatalogs", + "dh_installcron", + "dh_installdebconf", + "dh_installemacsen", + "dh_installifupdown", + "dh_installinfo", + "dh_installinit", + "dh_installsysusers", + "dh_installtmpfiles", + "dh_installsystemd", + "dh_installsystemduser", + "dh_installmenu", + "dh_installmime", + "dh_installmodules", + "dh_installlogcheck", + "dh_installlogrotate", + "dh_installpam", + "dh_installppp", + "dh_installudev", + "dh_installgsettings", + "dh_installinitramfs", + "dh_installalternatives", + "dh_bugfiles", + "dh_ucf", + "dh_lintian", + "dh_icons", + "dh_usrlocal", + "dh_perl", + "dh_link", + "dh_installwm", + "dh_installxfonts", + "dh_strip_nondeterminism", + "dh_compress", + "dh_fixperms", + "dh_dwz", + "dh_strip", + "dh_makeshlibs", + "dh_shlibdeps", + "dh_missing", + "dh_installdeb", + "dh_gencontrol", + "dh_md5sums", + "dh_builddeb", + } + ), +} + + +@dataclasses.dataclass(frozen=True, slots=True) +class UnsupportedDHConfig: + dh_config_basename: str + dh_tool: str + bug_950723_prefix_matching: bool = False + is_missing_migration: bool = False + + +@dataclasses.dataclass(frozen=True, slots=True) +class DHSequenceMigration: + debputy_plugin: str + remove_dh_sequence: bool = True + must_use_zz_debputy: bool = False + + +UNSUPPORTED_DH_CONFIGS_AND_TOOLS_FOR_ZZ_DEBPUTY = [ + UnsupportedDHConfig("config", "dh_installdebconf"), + UnsupportedDHConfig("templates", "dh_installdebconf"), + UnsupportedDHConfig("emacsen-compat", "dh_installemacsen"), + UnsupportedDHConfig("emacsen-install", "dh_installemacsen"), + UnsupportedDHConfig("emacsen-remove", "dh_installemacsen"), + UnsupportedDHConfig("emacsen-startup", "dh_installemacsen"), + # The `upstart` file should be long dead, but we might as well detect it. + UnsupportedDHConfig("upstart", "dh_installinit"), + # dh_installsystemduser + UnsupportedDHConfig( + "user.path", "dh_installsystemduser", bug_950723_prefix_matching=False + ), + UnsupportedDHConfig( + "user.path", "dh_installsystemduser", bug_950723_prefix_matching=True + ), + UnsupportedDHConfig( + "user.service", "dh_installsystemduser", bug_950723_prefix_matching=False + ), + UnsupportedDHConfig( + "user.service", "dh_installsystemduser", bug_950723_prefix_matching=True + ), + UnsupportedDHConfig( + "user.socket", "dh_installsystemduser", bug_950723_prefix_matching=False + ), + UnsupportedDHConfig( + "user.socket", "dh_installsystemduser", bug_950723_prefix_matching=True + ), + UnsupportedDHConfig( + "user.target", "dh_installsystemduser", bug_950723_prefix_matching=False + ), + UnsupportedDHConfig( + "user.target", "dh_installsystemduser", bug_950723_prefix_matching=True + ), + UnsupportedDHConfig( + "user.timer", "dh_installsystemduser", bug_950723_prefix_matching=False + ), + UnsupportedDHConfig( + "user.timer", "dh_installsystemduser", bug_950723_prefix_matching=True + ), + UnsupportedDHConfig("udev", "dh_installudev"), + UnsupportedDHConfig("menu", "dh_installmenu"), + UnsupportedDHConfig("menu-method", "dh_installmenu"), + UnsupportedDHConfig("ucf", "dh_ucf"), + UnsupportedDHConfig("wm", "dh_installwm"), + UnsupportedDHConfig("triggers", "dh_installdeb"), + UnsupportedDHConfig("postinst", "dh_installdeb"), + UnsupportedDHConfig("postrm", "dh_installdeb"), + UnsupportedDHConfig("preinst", "dh_installdeb"), + UnsupportedDHConfig("prerm", "dh_installdeb"), + UnsupportedDHConfig("menutest", "dh_installdeb"), + UnsupportedDHConfig("isinstallable", "dh_installdeb"), +] +SUPPORTED_DH_ADDONS = frozenset( + { + # debputy's own + "debputy", + "zz-debputy", + # debhelper provided sequences that should work. + "single-binary", + } +) +DH_ADDONS_TO_REMOVE = frozenset( + [ + # Sequences debputy directly replaces + "dwz", + "elf-tools", + "installinitramfs", + "installsysusers", + "doxygen", + # Sequences that are embedded fully into debputy + "bash-completion", + "sodeps", + ] +) +DH_ADDONS_TO_PLUGINS = { + "gnome": DHSequenceMigration( + "gnome", + # The sequence still provides a command for the clean sequence + remove_dh_sequence=False, + must_use_zz_debputy=True, + ), + "numpy3": DHSequenceMigration( + "numpy3", + # The sequence provides (build-time) dependencies that we cannot provide + remove_dh_sequence=False, + must_use_zz_debputy=True, + ), + "perl-openssl": DHSequenceMigration( + "perl-openssl", + # The sequence provides (build-time) dependencies that we cannot provide + remove_dh_sequence=False, + must_use_zz_debputy=True, + ), +} + + +def _dh_config_file( + debian_dir: VirtualPath, + dctrl_bin: BinaryPackage, + basename: str, + helper_name: str, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + manifest: HighLevelManifest, + support_executable_files: bool = False, + allow_dh_exec_rename: bool = False, + pkgfile_lookup: bool = True, + remove_on_migration: bool = True, +) -> Union[Tuple[None, None], Tuple[VirtualPath, Iterable[DHConfigFileLine]]]: + mutable_manifest = assume_not_none(manifest.mutable_manifest) + dh_config_file = ( + dhe_pkgfile(debian_dir, dctrl_bin, basename) + if pkgfile_lookup + else debian_dir.get(basename) + ) + if dh_config_file is None or dh_config_file.is_dir: + return None, None + if dh_config_file.is_executable and not support_executable_files: + primary_key = f"executable-{helper_name}-config" + if ( + primary_key in acceptable_migration_issues + or "any-executable-dh-configs" in acceptable_migration_issues + ): + feature_migration.warn( + f'TODO: MANUAL MIGRATION of executable dh config "{dh_config_file}" is required.' + ) + return None, None + raise UnsupportedFeature( + f"Executable configuration files not supported (found: {dh_config_file}).", + [primary_key, "any-executable-dh-configs"], + ) + + if remove_on_migration: + feature_migration.remove_on_success(dh_config_file.fs_path) + substitution = DHMigrationSubstitution( + dpkg_architecture_table(), + acceptable_migration_issues, + feature_migration, + mutable_manifest, + ) + content = dhe_filedoublearray( + dh_config_file, + substitution, + allow_dh_exec_rename=allow_dh_exec_rename, + ) + return dh_config_file, content + + +def _validate_rm_mv_conffile( + package: str, + config_line: DHConfigFileLine, +) -> Tuple[str, str, Optional[str], Optional[str], Optional[str]]: + cmd, *args = config_line.tokens + if "--" in config_line.tokens: + raise ValueError( + f'The maintscripts file "{config_line.config_file.path}" for {package} includes a "--" in line' + f" {config_line.line_no}. The offending line is: {config_line.original_line}" + ) + if cmd == "rm_conffile": + min_args = 1 + max_args = 3 + else: + min_args = 2 + max_args = 4 + if len(args) > max_args or len(args) < min_args: + raise ValueError( + f'The "{cmd}" command takes at least {min_args} and at most {max_args} arguments. However,' + f' in "{config_line.config_file.path}" line {config_line.line_no} (for {package}), there' + f" are {len(args)} arguments. The offending line is: {config_line.original_line}" + ) + + obsolete_conffile = args[0] + new_conffile = args[1] if cmd == "mv_conffile" else None + prior_version = args[min_args] if len(args) > min_args else None + owning_package = args[min_args + 1] if len(args) > min_args + 1 else None + if not obsolete_conffile.startswith("/"): + raise ValueError( + f'The (old-)conffile parameter for {cmd} must be absolute (i.e., start with "/"). However,' + f' in "{config_line.config_file.path}" line {config_line.line_no} (for {package}), it was specified' + f' as "{obsolete_conffile}". The offending line is: {config_line.original_line}' + ) + if new_conffile is not None and not new_conffile.startswith("/"): + raise ValueError( + f'The new-conffile parameter for {cmd} must be absolute (i.e., start with "/"). However,' + f' in "{config_line.config_file.path}" line {config_line.line_no} (for {package}), it was specified' + f' as "{new_conffile}". The offending line is: {config_line.original_line}' + ) + if prior_version is not None and not PKGVERSION_REGEX.fullmatch(prior_version): + raise ValueError( + f"The prior-version parameter for {cmd} must be a valid package version (i.e., match" + f' {PKGVERSION_REGEX}). However, in "{config_line.config_file.path}" line {config_line.line_no}' + f' (for {package}), it was specified as "{prior_version}". The offending line is:' + f" {config_line.original_line}" + ) + if owning_package is not None and not PKGNAME_REGEX.fullmatch(owning_package): + raise ValueError( + f"The package parameter for {cmd} must be a valid package name (i.e., match {PKGNAME_REGEX})." + f' However, in "{config_line.config_file.path}" line {config_line.line_no} (for {package}), it' + f' was specified as "{owning_package}". The offending line is: {config_line.original_line}' + ) + return cmd, obsolete_conffile, new_conffile, prior_version, owning_package + + +_BASH_COMPLETION_RE = re.compile( + r""" + (^|[|&;])\s*complete.*-[A-Za-z].* + | \$\(.*\) + | \s*compgen.*-[A-Za-z].* + | \s*if.*;.*then/ +""", + re.VERBOSE, +) + + +def migrate_bash_completion( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_bash-completion files" + is_single_binary = sum(1 for _ in manifest.all_packages) == 1 + mutable_manifest = assume_not_none(manifest.mutable_manifest) + installations = mutable_manifest.installations(create_if_absent=False) + + for dctrl_bin in manifest.all_packages: + dh_file = dhe_pkgfile(debian_dir, dctrl_bin, "bash-completion") + if dh_file is None: + continue + is_bash_completion_file = False + with dh_file.open() as fd: + for line in fd: + line = line.strip() + if not line or line[0] == "#": + continue + if _BASH_COMPLETION_RE.search(line): + is_bash_completion_file = True + break + if not is_bash_completion_file: + _, content = _dh_config_file( + debian_dir, + dctrl_bin, + "bash-completion", + "dh_bash-completion", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=True, + ) + else: + content = None + + if content: + install_dest_sources: List[str] = [] + install_as_rules: List[Tuple[str, str]] = [] + for dhe_line in content: + if len(dhe_line.tokens) > 2: + raise UnsupportedFeature( + f"The dh_bash-completion file {dh_file.path} more than two words on" + f' line {dhe_line.line_no} (line: "{dhe_line.original_line}").' + ) + source = dhe_line.tokens[0] + dest_basename = ( + dhe_line.tokens[1] + if len(dhe_line.tokens) > 1 + else os.path.basename(source) + ) + if source.startswith("debian/") and not has_glob_magic(source): + if dctrl_bin.name != dest_basename: + dest_path = ( + f"debian/{dctrl_bin.name}.{dest_basename}.bash-completion" + ) + else: + dest_path = f"debian/{dest_basename}.bash-completion" + feature_migration.rename_on_success(source, dest_path) + elif len(dhe_line.tokens) == 1: + install_dest_sources.append(source) + else: + install_as_rules.append((source, dest_basename)) + + if install_dest_sources: + sources = ( + install_dest_sources + if len(install_dest_sources) > 1 + else install_dest_sources[0] + ) + installations.append( + AbstractMutableYAMLInstallRule.install_dest( + sources=sources, + dest_dir="{{path:BASH_COMPLETION_DIR}}", + into=dctrl_bin.name if not is_single_binary else None, + ) + ) + + for source, dest_basename in install_as_rules: + installations.append( + AbstractMutableYAMLInstallRule.install_as( + source=source, + install_as="{{path:BASH_COMPLETION_DIR}}/" + dest_basename, + into=dctrl_bin.name if not is_single_binary else None, + ) + ) + + +def migrate_dh_installsystemd_files( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + _acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_installsystemd files" + for dctrl_bin in manifest.all_packages: + for stem in [ + "path", + "service", + "socket", + "target", + "timer", + ]: + pkgfile = dhe_pkgfile( + debian_dir, dctrl_bin, stem, bug_950723_prefix_matching=True + ) + if not pkgfile: + continue + if not pkgfile.name.endswith(f".{stem}") or "@." not in pkgfile.name: + raise UnsupportedFeature( + f'Unable to determine the correct name for {pkgfile.fs_path}. It should be a ".@{stem}"' + f" file now (foo@.service => foo.@service)" + ) + newname = pkgfile.name.replace("@.", ".") + newname = newname[: -len(stem)] + f"@{stem}" + feature_migration.rename_on_success( + pkgfile.fs_path, os.path.join(debian_dir.fs_path, newname) + ) + + +def migrate_maintscript( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_installdeb files" + mutable_manifest = assume_not_none(manifest.mutable_manifest) + for dctrl_bin in manifest.all_packages: + mainscript_file, content = _dh_config_file( + debian_dir, + dctrl_bin, + "maintscript", + "dh_installdeb", + acceptable_migration_issues, + feature_migration, + manifest, + ) + + if mainscript_file is None: + continue + assert content is not None + + package_definition = mutable_manifest.package(dctrl_bin.name) + conffiles = { + it.obsolete_conffile: it + for it in package_definition.conffile_management_items() + } + seen_conffiles = set() + + for dhe_line in content: + cmd = dhe_line.tokens[0] + if cmd not in {"rm_conffile", "mv_conffile"}: + raise UnsupportedFeature( + f"The dh_installdeb file {mainscript_file.path} contains the (currently)" + f' unsupported command "{cmd}" on line {dhe_line.line_no}' + f' (line: "{dhe_line.original_line}")' + ) + + try: + ( + _, + obsolete_conffile, + new_conffile, + prior_to_version, + owning_package, + ) = _validate_rm_mv_conffile(dctrl_bin.name, dhe_line) + except ValueError as e: + _error( + f"Validation error in {mainscript_file} on line {dhe_line.line_no}. The error was: {e.args[0]}." + ) + + if obsolete_conffile in seen_conffiles: + raise ConflictingChange( + f'The {mainscript_file} file defines actions for "{obsolete_conffile}" twice!' + f" Please ensure that it is defined at most once in that file." + ) + seen_conffiles.add(obsolete_conffile) + + if cmd == "rm_conffile": + item = MutableYAMLConffileManagementItem.rm_conffile( + obsolete_conffile, + prior_to_version, + owning_package, + ) + else: + assert cmd == "mv_conffile" + item = MutableYAMLConffileManagementItem.mv_conffile( + obsolete_conffile, + assume_not_none(new_conffile), + prior_to_version, + owning_package, + ) + + existing_def = conffiles.get(item.obsolete_conffile) + if existing_def is not None: + if not ( + item.command == existing_def.command + and item.new_conffile == existing_def.new_conffile + and item.prior_to_version == existing_def.prior_to_version + and item.owning_package == existing_def.owning_package + ): + raise ConflictingChange( + f"The maintscript defines the action {item.command} for" + f' "{obsolete_conffile}" in {mainscript_file}, but there is another' + f" conffile management definition for same path defined already (in the" + f" existing manifest or an migration e.g., inside {mainscript_file})" + ) + feature_migration.already_present += 1 + continue + + package_definition.add_conffile_management(item) + feature_migration.successful_manifest_changes += 1 + + +@dataclasses.dataclass(slots=True) +class SourcesAndConditional: + dest_dir: Optional[str] = None + sources: List[str] = dataclasses.field(default_factory=list) + conditional: Optional[Union[str, Mapping[str, Any]]] = None + + +def _strip_d_tmp(p: str) -> str: + if p.startswith("debian/tmp/") and len(p) > 11: + return p[11:] + return p + + +def migrate_install_file( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_install config files" + mutable_manifest = assume_not_none(manifest.mutable_manifest) + installations = mutable_manifest.installations(create_if_absent=False) + priority_lines = [] + remaining_install_lines = [] + warn_about_fixmes_in_dest_dir = False + + is_single_binary = sum(1 for _ in manifest.all_packages) == 1 + + for dctrl_bin in manifest.all_packages: + install_file, content = _dh_config_file( + debian_dir, + dctrl_bin, + "install", + "dh_install", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=True, + allow_dh_exec_rename=True, + ) + if not install_file or not content: + continue + current_sources = [] + sources_by_destdir: Dict[Tuple[str, Tuple[str, ...]], SourcesAndConditional] = ( + {} + ) + install_as_rules = [] + multi_dest = collections.defaultdict(list) + seen_sources = set() + multi_dest_sources: Set[str] = set() + + for dhe_line in content: + special_rule = None + if "=>" in dhe_line.tokens: + if dhe_line.tokens[0] == "=>" and len(dhe_line.tokens) == 2: + # This rule must be as early as possible to retain the semantics + path = _strip_d_tmp( + _normalize_path(dhe_line.tokens[1], with_prefix=False) + ) + special_rule = AbstractMutableYAMLInstallRule.install_dest( + path, + dctrl_bin.name if not is_single_binary else None, + dest_dir=None, + when=dhe_line.conditional(), + ) + elif len(dhe_line.tokens) != 3: + _error( + f"Validation error in {install_file.path} on line {dhe_line.line_no}. Cannot migrate dh-exec" + ' renames that is not exactly "SOURCE => TARGET" or "=> TARGET".' + ) + else: + install_rule = AbstractMutableYAMLInstallRule.install_as( + _strip_d_tmp( + _normalize_path(dhe_line.tokens[0], with_prefix=False) + ), + _normalize_path(dhe_line.tokens[2], with_prefix=False), + dctrl_bin.name if not is_single_binary else None, + when=dhe_line.conditional(), + ) + install_as_rules.append(install_rule) + else: + if len(dhe_line.tokens) > 1: + sources = list( + _strip_d_tmp(_normalize_path(w, with_prefix=False)) + for w in dhe_line.tokens[:-1] + ) + dest_dir = _normalize_path(dhe_line.tokens[-1], with_prefix=False) + else: + sources = list( + _strip_d_tmp(_normalize_path(w, with_prefix=False)) + for w in dhe_line.tokens + ) + dest_dir = None + + multi_dest_sources.update(s for s in sources if s in seen_sources) + seen_sources.update(sources) + + if dest_dir is None and dhe_line.conditional() is None: + current_sources.extend(sources) + continue + key = (dest_dir, dhe_line.conditional_key()) + md = _fetch_or_create( + sources_by_destdir, + key, + # Use named parameters to avoid warnings about the values possible changing + # in the next iteration. We always resolve the lambda in this iteration, so + # the bug is non-existent. However, that is harder for a linter to prove. + lambda *, dest=dest_dir, dhe=dhe_line: SourcesAndConditional( + dest_dir=dest, + conditional=dhe.conditional(), + ), + ) + md.sources.extend(sources) + + if special_rule: + priority_lines.append(special_rule) + + remaining_install_lines.extend(install_as_rules) + + for md in sources_by_destdir.values(): + if multi_dest_sources: + sources = [s for s in md.sources if s not in multi_dest_sources] + already_installed = (s for s in md.sources if s in multi_dest_sources) + for s in already_installed: + # The sources are ignored, so we can reuse the object as-is + multi_dest[s].append(md) + if not sources: + continue + else: + sources = md.sources + install_rule = AbstractMutableYAMLInstallRule.install_dest( + sources[0] if len(sources) == 1 else sources, + dctrl_bin.name if not is_single_binary else None, + dest_dir=md.dest_dir, + when=md.conditional, + ) + remaining_install_lines.append(install_rule) + + if current_sources: + if multi_dest_sources: + sources = [s for s in current_sources if s not in multi_dest_sources] + already_installed = ( + s for s in current_sources if s in multi_dest_sources + ) + for s in already_installed: + # The sources are ignored, so we can reuse the object as-is + dest_dir = os.path.dirname(s) + if has_glob_magic(dest_dir): + warn_about_fixmes_in_dest_dir = True + dest_dir = f"FIXME: {dest_dir} (could not reliably compute the dest dir)" + multi_dest[s].append( + SourcesAndConditional( + dest_dir=dest_dir, + conditional=None, + ) + ) + else: + sources = current_sources + + if sources: + install_rule = AbstractMutableYAMLInstallRule.install_dest( + sources[0] if len(sources) == 1 else sources, + dctrl_bin.name if not is_single_binary else None, + dest_dir=None, + ) + remaining_install_lines.append(install_rule) + + if multi_dest: + for source, dest_and_conditionals in multi_dest.items(): + dest_dirs = [dac.dest_dir for dac in dest_and_conditionals] + # We assume the conditional is the same. + conditional = next( + iter( + dac.conditional + for dac in dest_and_conditionals + if dac.conditional is not None + ), + None, + ) + remaining_install_lines.append( + AbstractMutableYAMLInstallRule.multi_dest_install( + source, + dest_dirs, + dctrl_bin.name if not is_single_binary else None, + when=conditional, + ) + ) + + if priority_lines: + installations.extend(priority_lines) + + if remaining_install_lines: + installations.extend(remaining_install_lines) + + feature_migration.successful_manifest_changes += len(priority_lines) + len( + remaining_install_lines + ) + if warn_about_fixmes_in_dest_dir: + feature_migration.warn( + "TODO: FIXME left in dest-dir(s) of some installation rules." + " Please review these and remove the FIXME (plus correct as necessary)" + ) + + +def migrate_installdocs_file( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_installdocs config files" + mutable_manifest = assume_not_none(manifest.mutable_manifest) + installations = mutable_manifest.installations(create_if_absent=False) + + is_single_binary = sum(1 for _ in manifest.all_packages) == 1 + + for dctrl_bin in manifest.all_packages: + install_file, content = _dh_config_file( + debian_dir, + dctrl_bin, + "docs", + "dh_installdocs", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=True, + ) + if not install_file: + continue + assert content is not None + docs: List[str] = [] + for dhe_line in content: + if dhe_line.arch_filter or dhe_line.build_profile_filter: + _error( + f"Unable to migrate line {dhe_line.line_no} of {install_file.path}." + " Missing support for conditions." + ) + docs.extend(_normalize_path(w, with_prefix=False) for w in dhe_line.tokens) + + if not docs: + continue + feature_migration.successful_manifest_changes += 1 + install_rule = AbstractMutableYAMLInstallRule.install_docs( + docs if len(docs) > 1 else docs[0], + dctrl_bin.name if not is_single_binary else None, + ) + installations.create_definition_if_missing() + installations.append(install_rule) + + +def migrate_installexamples_file( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_installexamples config files" + mutable_manifest = assume_not_none(manifest.mutable_manifest) + installations = mutable_manifest.installations(create_if_absent=False) + is_single_binary = sum(1 for _ in manifest.all_packages) == 1 + + for dctrl_bin in manifest.all_packages: + install_file, content = _dh_config_file( + debian_dir, + dctrl_bin, + "examples", + "dh_installexamples", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=True, + ) + if not install_file: + continue + assert content is not None + examples: List[str] = [] + for dhe_line in content: + if dhe_line.arch_filter or dhe_line.build_profile_filter: + _error( + f"Unable to migrate line {dhe_line.line_no} of {install_file.path}." + " Missing support for conditions." + ) + examples.extend( + _normalize_path(w, with_prefix=False) for w in dhe_line.tokens + ) + + if not examples: + continue + feature_migration.successful_manifest_changes += 1 + install_rule = AbstractMutableYAMLInstallRule.install_examples( + examples if len(examples) > 1 else examples[0], + dctrl_bin.name if not is_single_binary else None, + ) + installations.create_definition_if_missing() + installations.append(install_rule) + + +@dataclasses.dataclass(slots=True) +class InfoFilesDefinition: + sources: List[str] = dataclasses.field(default_factory=list) + conditional: Optional[Union[str, Mapping[str, Any]]] = None + + +def migrate_installinfo_file( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_installinfo config files" + mutable_manifest = assume_not_none(manifest.mutable_manifest) + installations = mutable_manifest.installations(create_if_absent=False) + is_single_binary = sum(1 for _ in manifest.all_packages) == 1 + + for dctrl_bin in manifest.all_packages: + info_file, content = _dh_config_file( + debian_dir, + dctrl_bin, + "info", + "dh_installinfo", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=True, + ) + if not info_file: + continue + assert content is not None + info_files_by_condition: Dict[Tuple[str, ...], InfoFilesDefinition] = {} + for dhe_line in content: + key = dhe_line.conditional_key() + info_def = _fetch_or_create( + info_files_by_condition, + key, + lambda: InfoFilesDefinition(conditional=dhe_line.conditional()), + ) + info_def.sources.extend( + _normalize_path(w, with_prefix=False) for w in dhe_line.tokens + ) + + if not info_files_by_condition: + continue + feature_migration.successful_manifest_changes += 1 + installations.create_definition_if_missing() + for info_def in info_files_by_condition.values(): + info_files = info_def.sources + install_rule = AbstractMutableYAMLInstallRule.install_docs( + info_files if len(info_files) > 1 else info_files[0], + dctrl_bin.name if not is_single_binary else None, + dest_dir="{{path:GNU_INFO_DIR}}", + when=info_def.conditional, + ) + installations.append(install_rule) + + +@dataclasses.dataclass(slots=True) +class ManpageDefinition: + sources: List[str] = dataclasses.field(default_factory=list) + language: Optional[str] = None + conditional: Optional[Union[str, Mapping[str, Any]]] = None + + +DK = TypeVar("DK") +DV = TypeVar("DV") + + +def _fetch_or_create(d: Dict[DK, DV], key: DK, factory: Callable[[], DV]) -> DV: + v = d.get(key) + if v is None: + v = factory() + d[key] = v + return v + + +def migrate_installman_file( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_installman config files" + mutable_manifest = assume_not_none(manifest.mutable_manifest) + installations = mutable_manifest.installations(create_if_absent=False) + is_single_binary = sum(1 for _ in manifest.all_packages) == 1 + warn_about_basename = False + + for dctrl_bin in manifest.all_packages: + manpages_file, content = _dh_config_file( + debian_dir, + dctrl_bin, + "manpages", + "dh_installman", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=True, + allow_dh_exec_rename=True, + ) + if not manpages_file: + continue + assert content is not None + + vanilla_definitions = [] + install_as_rules = [] + complex_definitions: Dict[ + Tuple[Optional[str], Tuple[str, ...]], ManpageDefinition + ] = {} + install_rule: AbstractMutableYAMLInstallRule + for dhe_line in content: + if "=>" in dhe_line.tokens: + # dh-exec allows renaming features. For `debputy`, we degenerate it into an `install` (w. `as`) feature + # without any of the `install-man` features. + if dhe_line.tokens[0] == "=>" and len(dhe_line.tokens) == 2: + _error( + f'Unsupported "=> DEST" rule for error in {manpages_file.path} on line {dhe_line.line_no}."' + f' Cannot migrate dh-exec renames that is not exactly "SOURCE => TARGET" for d/manpages files.' + ) + elif len(dhe_line.tokens) != 3: + _error( + f"Validation error in {manpages_file.path} on line {dhe_line.line_no}. Cannot migrate dh-exec" + ' renames that is not exactly "SOURCE => TARGET" or "=> TARGET".' + ) + else: + install_rule = AbstractMutableYAMLInstallRule.install_doc_as( + _normalize_path(dhe_line.tokens[0], with_prefix=False), + _normalize_path(dhe_line.tokens[2], with_prefix=False), + dctrl_bin.name if not is_single_binary else None, + when=dhe_line.conditional(), + ) + install_as_rules.append(install_rule) + continue + + sources = [_normalize_path(w, with_prefix=False) for w in dhe_line.tokens] + needs_basename = any( + MAN_GUESS_FROM_BASENAME.search(x) + and not MAN_GUESS_LANG_FROM_PATH.search(x) + for x in sources + ) + if needs_basename or dhe_line.conditional() is not None: + if needs_basename: + warn_about_basename = True + language = "derive-from-basename" + else: + language = None + key = (language, dhe_line.conditional_key()) + manpage_def = _fetch_or_create( + complex_definitions, + key, + lambda: ManpageDefinition( + language=language, conditional=dhe_line.conditional() + ), + ) + manpage_def.sources.extend(sources) + else: + vanilla_definitions.extend(sources) + + if not install_as_rules and not vanilla_definitions and not complex_definitions: + continue + feature_migration.successful_manifest_changes += 1 + installations.create_definition_if_missing() + installations.extend(install_as_rules) + if vanilla_definitions: + man_source = ( + vanilla_definitions + if len(vanilla_definitions) > 1 + else vanilla_definitions[0] + ) + install_rule = AbstractMutableYAMLInstallRule.install_man( + man_source, + dctrl_bin.name if not is_single_binary else None, + None, + ) + installations.append(install_rule) + for manpage_def in complex_definitions.values(): + sources = manpage_def.sources + install_rule = AbstractMutableYAMLInstallRule.install_man( + sources if len(sources) > 1 else sources[0], + dctrl_bin.name if not is_single_binary else None, + manpage_def.language, + when=manpage_def.conditional, + ) + installations.append(install_rule) + + if warn_about_basename: + feature_migration.warn( + 'Detected manpages that might rely on "derive-from-basename" logic. Please double check' + " that the generated `install-man` rules are correct" + ) + + +def migrate_not_installed_file( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_missing's not-installed config file" + mutable_manifest = assume_not_none(manifest.mutable_manifest) + installations = mutable_manifest.installations(create_if_absent=False) + main_binary = [p for p in manifest.all_packages if p.is_main_package][0] + + missing_file, content = _dh_config_file( + debian_dir, + main_binary, + "not-installed", + "dh_missing", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=False, + pkgfile_lookup=False, + ) + discard_rules: List[str] = [] + if missing_file: + assert content is not None + for dhe_line in content: + discard_rules.extend( + _normalize_path(w, with_prefix=False) for w in dhe_line.tokens + ) + + if discard_rules: + feature_migration.successful_manifest_changes += 1 + install_rule = AbstractMutableYAMLInstallRule.discard( + discard_rules if len(discard_rules) > 1 else discard_rules[0], + ) + installations.create_definition_if_missing() + installations.append(install_rule) + + +def detect_pam_files( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + _acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "detect dh_installpam files (min dh compat)" + for dctrl_bin in manifest.all_packages: + dh_config_file = dhe_pkgfile(debian_dir, dctrl_bin, "pam") + if dh_config_file is not None: + feature_migration.assumed_compat = 14 + break + + +def migrate_tmpfile( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + _acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_installtmpfiles config files" + for dctrl_bin in manifest.all_packages: + dh_config_file = dhe_pkgfile(debian_dir, dctrl_bin, "tmpfile") + if dh_config_file is not None: + target = ( + dh_config_file.name.replace(".tmpfile", ".tmpfiles") + if "." in dh_config_file.name + else "tmpfiles" + ) + _rename_file_if_exists( + debian_dir, + dh_config_file.name, + target, + feature_migration, + ) + + +def migrate_lintian_overrides_files( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_lintian config files" + for dctrl_bin in manifest.all_packages: + # We do not support executable lintian-overrides and `_dh_config_file` handles all of that. + # Therefore, the return value is irrelevant to us. + _dh_config_file( + debian_dir, + dctrl_bin, + "lintian-overrides", + "dh_lintian", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=False, + remove_on_migration=False, + ) + + +def migrate_links_files( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "dh_link files" + mutable_manifest = assume_not_none(manifest.mutable_manifest) + for dctrl_bin in manifest.all_packages: + links_file, content = _dh_config_file( + debian_dir, + dctrl_bin, + "links", + "dh_link", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=True, + ) + + if links_file is None: + continue + assert content is not None + + package_definition = mutable_manifest.package(dctrl_bin.name) + defined_symlink = { + symlink.symlink_path: symlink.symlink_target + for symlink in package_definition.symlinks() + } + + seen_symlinks: Set[str] = set() + + for dhe_line in content: + if len(dhe_line.tokens) != 2: + raise UnsupportedFeature( + f"The dh_link file {links_file.fs_path} did not have exactly two paths on line" + f' {dhe_line.line_no} (line: "{dhe_line.original_line}"' + ) + target, source = dhe_line.tokens + if source in seen_symlinks: + # According to #934499, this has happened in the wild already + raise ConflictingChange( + f"The {links_file.fs_path} file defines the link path {source} twice! Please ensure" + " that it is defined at most once in that file" + ) + seen_symlinks.add(source) + # Symlinks in .links are always considered absolute, but you were not required to have a leading slash. + # However, in the debputy manifest, you can have relative links, so we should ensure it is explicitly + # absolute. + if not target.startswith("/"): + target = "/" + target + existing_target = defined_symlink.get(source) + if existing_target is not None: + if existing_target != target: + raise ConflictingChange( + f'The symlink "{source}" points to "{target}" in {links_file}, but there is' + f' another symlink with same path pointing to "{existing_target}" defined' + " already (in the existing manifest or an migration e.g., inside" + f" {links_file.fs_path})" + ) + feature_migration.already_present += 1 + continue + condition = dhe_line.conditional() + package_definition.add_symlink( + MutableYAMLSymlink.new_symlink( + source, + target, + condition, + ) + ) + feature_migration.successful_manifest_changes += 1 + + +def migrate_misspelled_readme_debian_files( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "misspelled README.Debian files" + for dctrl_bin in manifest.all_packages: + readme, _ = _dh_config_file( + debian_dir, + dctrl_bin, + "README.debian", + "dh_installdocs", + acceptable_migration_issues, + feature_migration, + manifest, + support_executable_files=False, + remove_on_migration=False, + ) + if readme is None: + continue + new_name = readme.name.replace("README.debian", "README.Debian") + assert readme.name != new_name + _rename_file_if_exists( + debian_dir, + readme.name, + new_name, + feature_migration, + ) + + +def migrate_doc_base_files( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + _: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "doc-base files" + # ignore the dh_make ".EX" file if one should still be present. The dh_installdocs tool ignores it too. + possible_effected_doc_base_files = [ + f + for f in debian_dir.iterdir + if ( + (".doc-base." in f.name or f.name.startswith("doc-base.")) + and not f.name.endswith("doc-base.EX") + ) + ] + known_packages = {d.name: d for d in manifest.all_packages} + main_package = [d for d in manifest.all_packages if d.is_main_package][0] + for doc_base_file in possible_effected_doc_base_files: + parts = doc_base_file.name.split(".") + owning_package = known_packages.get(parts[0]) + if owning_package is None: + owning_package = main_package + package_part = None + else: + package_part = parts[0] + parts = parts[1:] + + if not parts or parts[0] != "doc-base": + # Not a doc-base file after all + continue + + if len(parts) > 1: + name_part = ".".join(parts[1:]) + if package_part is None: + # Named files must have a package prefix + package_part = owning_package.name + else: + # No rename needed + continue + + new_basename = ".".join(filter(None, (package_part, name_part, "doc-base"))) + _rename_file_if_exists( + debian_dir, + doc_base_file.name, + new_basename, + feature_migration, + ) + + +def migrate_dh_hook_targets( + debian_dir: VirtualPath, + _: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + migration_target: str, +) -> None: + feature_migration.tagline = "dh hook targets" + source_root = os.path.dirname(debian_dir.fs_path) + if source_root == "": + source_root = "." + detected_hook_targets = json.loads( + subprocess.check_output( + ["dh_assistant", "detect-hook-targets"], + cwd=source_root, + ).decode("utf-8") + ) + sample_hook_target: Optional[str] = None + replaced_commands = DH_COMMANDS_REPLACED[migration_target] + + for hook_target_def in detected_hook_targets["hook-targets"]: + if hook_target_def["is-empty"]: + continue + command = hook_target_def["command"] + if command not in replaced_commands: + continue + hook_target = hook_target_def["target-name"] + if sample_hook_target is None: + sample_hook_target = hook_target + feature_migration.warn( + f"TODO: MANUAL MIGRATION required for hook target {hook_target}" + ) + if ( + feature_migration.warnings + and "dh-hook-targets" not in acceptable_migration_issues + ): + assert sample_hook_target + raise UnsupportedFeature( + f"The debian/rules file contains one or more non empty dh hook targets that will not" + f" be run with the requested debputy dh sequence. One of these would be" + f" {sample_hook_target}.", + ["dh-hook-targets"], + ) + + +def detect_unsupported_zz_debputy_features( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "Known unsupported features" + + for unsupported_config in UNSUPPORTED_DH_CONFIGS_AND_TOOLS_FOR_ZZ_DEBPUTY: + _unsupported_debhelper_config_file( + debian_dir, + manifest, + unsupported_config, + acceptable_migration_issues, + feature_migration, + ) + + +def detect_obsolete_substvars( + debian_dir: VirtualPath, + _manifest: HighLevelManifest, + _acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = ( + "Check for obsolete ${foo:var} variables in debian/control" + ) + ctrl_file = debian_dir.get("control") + if not ctrl_file: + feature_migration.warn( + "Cannot find debian/control. Detection of obsolete substvars could not be performed." + ) + return + with ctrl_file.open() as fd: + ctrl = list(Deb822.iter_paragraphs(fd)) + + relationship_fields = dpkg_field_list_pkg_dep() + relationship_fields_lc = frozenset(x.lower() for x in relationship_fields) + + for p in ctrl[1:]: + seen_obsolete_relationship_substvars = set() + obsolete_fields = set() + is_essential = p.get("Essential") == "yes" + for df in relationship_fields: + field: Optional[str] = p.get(df) + if field is None: + continue + df_lc = df.lower() + number_of_relations = 0 + obsolete_substvars_in_field = set() + for d in (d.strip() for d in field.strip().split(",")): + if not d: + continue + number_of_relations += 1 + if not d.startswith("${"): + continue + try: + end_idx = d.index("}") + except ValueError: + continue + substvar_name = d[2:end_idx] + if ":" not in substvar_name: + continue + _, field = substvar_name.rsplit(":", 1) + field_lc = field.lower() + if field_lc not in relationship_fields_lc: + continue + is_obsolete = field_lc == df_lc + if ( + not is_obsolete + and is_essential + and substvar_name.lower() == "shlibs:depends" + and df_lc == "pre-depends" + ): + is_obsolete = True + + if is_obsolete: + obsolete_substvars_in_field.add(d) + + if number_of_relations == len(obsolete_substvars_in_field): + obsolete_fields.add(df) + else: + seen_obsolete_relationship_substvars.update(obsolete_substvars_in_field) + + package = p.get("Package", "(Missing package name!?)") + if obsolete_fields: + fields = ", ".join(obsolete_fields) + feature_migration.warn( + f"The following relationship fields can be removed from {package}: {fields}." + f" (The content in them would be applied automatically.)" + ) + if seen_obsolete_relationship_substvars: + v = ", ".join(sorted(seen_obsolete_relationship_substvars)) + feature_migration.warn( + f"The following relationship substitution variables can be removed from {package}: {v}" + ) + + +def read_dh_addon_sequences( + debian_dir: VirtualPath, +) -> Optional[Tuple[Set[str], Set[str]]]: + ctrl_file = debian_dir.get("control") + if ctrl_file: + dr_sequences: Set[str] = set() + bd_sequences = set() + + drules = debian_dir.get("rules") + if drules and drules.is_file: + parse_drules_for_addons(drules, dr_sequences) + + with ctrl_file.open() as fd: + ctrl = list(Deb822.iter_paragraphs(fd)) + source_paragraph = ctrl[0] if ctrl else {} + + extract_dh_addons_from_control(source_paragraph, bd_sequences) + return bd_sequences, dr_sequences + return None + + +def detect_dh_addons_zz_debputy_rrr( + debian_dir: VirtualPath, + _manifest: HighLevelManifest, + _acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "Check for dh-sequence-addons" + r = read_dh_addon_sequences(debian_dir) + if r is None: + feature_migration.warn( + "Cannot find debian/control. Detection of unsupported/missing dh-sequence addon" + " could not be performed. Please ensure the package will Build-Depend on dh-sequence-zz-debputy-rrr." + ) + return + + bd_sequences, dr_sequences = r + + remaining_sequences = bd_sequences | dr_sequences + saw_dh_debputy = "zz-debputy-rrr" in remaining_sequences + + if not saw_dh_debputy: + feature_migration.warn("Missing Build-Depends on dh-sequence-zz-debputy-rrr") + + +def detect_dh_addons( + debian_dir: VirtualPath, + _manifest: HighLevelManifest, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + _migration_target: str, +) -> None: + feature_migration.tagline = "Check for dh-sequence-addons" + r = read_dh_addon_sequences(debian_dir) + if r is None: + feature_migration.warn( + "Cannot find debian/control. Detection of unsupported/missing dh-sequence addon" + " could not be performed. Please ensure the package will Build-Depend on dh-sequence-zz-debputy" + " and not rely on any other debhelper sequence addons except those debputy explicitly supports." + ) + return + + bd_sequences, dr_sequences = r + + remaining_sequences = bd_sequences | dr_sequences + saw_dh_debputy = ( + "debputy" in remaining_sequences or "zz-debputy" in remaining_sequences + ) + saw_zz_debputy = "zz-debputy" in remaining_sequences + must_use_zz_debputy = False + remaining_sequences -= SUPPORTED_DH_ADDONS + for sequence in remaining_sequences & DH_ADDONS_TO_PLUGINS.keys(): + migration = DH_ADDONS_TO_PLUGINS[sequence] + feature_migration.require_plugin(migration.debputy_plugin) + if migration.remove_dh_sequence: + if migration.must_use_zz_debputy: + must_use_zz_debputy = True + if sequence in bd_sequences: + feature_migration.warn( + f"TODO: MANUAL MIGRATION - Remove build-dependency on dh-sequence-{sequence}" + f" (replaced by debputy-plugin-{migration.debputy_plugin})" + ) + else: + feature_migration.warn( + f"TODO: MANUAL MIGRATION - Remove --with {sequence} from dh in d/rules" + f" (replaced by debputy-plugin-{migration.debputy_plugin})" + ) + + remaining_sequences -= DH_ADDONS_TO_PLUGINS.keys() + + alt_key = "unsupported-dh-sequences" + for sequence in remaining_sequences & DH_ADDONS_TO_REMOVE: + if sequence in bd_sequences: + feature_migration.warn( + f"TODO: MANUAL MIGRATION - Remove build dependency on dh-sequence-{sequence}" + ) + else: + feature_migration.warn( + f"TODO: MANUAL MIGRATION - Remove --with {sequence} from dh in d/rules" + ) + + remaining_sequences -= DH_ADDONS_TO_REMOVE + + for sequence in remaining_sequences: + key = f"unsupported-dh-sequence-{sequence}" + msg = f'The dh addon "{sequence}" is not known to work with dh-debputy and might malfunction' + if ( + key not in acceptable_migration_issues + and alt_key not in acceptable_migration_issues + ): + raise UnsupportedFeature(msg, [key, alt_key]) + feature_migration.warn(msg) + + if not saw_dh_debputy: + feature_migration.warn("Missing Build-Depends on dh-sequence-zz-debputy") + elif must_use_zz_debputy and not saw_zz_debputy: + feature_migration.warn( + "Please use the zz-debputy sequence rather than the debputy (needed due to dh add-on load order)" + ) + + +def _rename_file_if_exists( + debian_dir: VirtualPath, + source: str, + dest: str, + feature_migration: FeatureMigration, +) -> None: + source_path = debian_dir.get(source) + dest_path = debian_dir.get(dest) + spath = ( + source_path.path + if source_path is not None + else os.path.join(debian_dir.path, source) + ) + dpath = ( + dest_path.path if dest_path is not None else os.path.join(debian_dir.path, dest) + ) + if source_path is not None and source_path.is_file: + if dest_path is not None: + if not dest_path.is_file: + feature_migration.warnings.append( + f'TODO: MANUAL MIGRATION - there is a "{spath}" (file) and "{dpath}" (not a file).' + f' The migration wanted to replace "{spath}" with "{dpath}", but since "{dpath}" is not' + " a file, this step is left as a manual migration." + ) + return + if ( + subprocess.call(["cmp", "-s", source_path.fs_path, dest_path.fs_path]) + != 0 + ): + feature_migration.warnings.append( + f'TODO: MANUAL MIGRATION - there is a "{source_path.path}" and "{dest_path.path}"' + f" file. Normally these files are for the same package and there would only be one of" + f" them. In this case, they both exist but their content differs. Be advised that" + f' debputy tool will use the "{dest_path.path}".' + ) + else: + feature_migration.remove_on_success(dest_path.fs_path) + else: + feature_migration.rename_on_success( + source_path.fs_path, + os.path.join(debian_dir.fs_path, dest), + ) + elif source_path is not None: + feature_migration.warnings.append( + f'TODO: MANUAL MIGRATION - The migration would normally have renamed "{spath}" to "{dpath}".' + f' However, the migration assumed "{spath}" would be a file and it is not. Therefore, this step' + " as a manual migration." + ) + + +def _find_dh_config_file_for_any_pkg( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + unsupported_config: UnsupportedDHConfig, +) -> Iterable[VirtualPath]: + for dctrl_bin in manifest.all_packages: + dh_config_file = dhe_pkgfile( + debian_dir, + dctrl_bin, + unsupported_config.dh_config_basename, + bug_950723_prefix_matching=unsupported_config.bug_950723_prefix_matching, + ) + if dh_config_file is not None: + yield dh_config_file + + +def _unsupported_debhelper_config_file( + debian_dir: VirtualPath, + manifest: HighLevelManifest, + unsupported_config: UnsupportedDHConfig, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, +) -> None: + dh_config_files = list( + _find_dh_config_file_for_any_pkg(debian_dir, manifest, unsupported_config) + ) + if not dh_config_files: + return + dh_tool = unsupported_config.dh_tool + basename = unsupported_config.dh_config_basename + file_stem = ( + f"@{basename}" if unsupported_config.bug_950723_prefix_matching else basename + ) + dh_config_file = dh_config_files[0] + if unsupported_config.is_missing_migration: + feature_migration.warn( + f'Missing migration support for the "{dh_config_file.path}" debhelper config file' + f" (used by {dh_tool}). Manual migration may be feasible depending on the exact features" + " required." + ) + return + primary_key = f"unsupported-dh-config-file-{file_stem}" + secondary_key = "any-unsupported-dh-config-file" + if ( + primary_key not in acceptable_migration_issues + and secondary_key not in acceptable_migration_issues + ): + msg = ( + f'The "{dh_config_file.path}" debhelper config file (used by {dh_tool} is currently not' + " supported by debputy." + ) + raise UnsupportedFeature( + msg, + [primary_key, secondary_key], + ) + for dh_config_file in dh_config_files: + feature_migration.warn( + f'TODO: MANUAL MIGRATION - Use of unsupported "{dh_config_file.path}" file (used by {dh_tool})' + ) diff --git a/src/debputy/dh_migration/models.py b/src/debputy/dh_migration/models.py new file mode 100644 index 0000000..ace4185 --- /dev/null +++ b/src/debputy/dh_migration/models.py @@ -0,0 +1,173 @@ +import dataclasses +import re +from typing import Sequence, Optional, FrozenSet, Tuple, List, cast + +from debputy.architecture_support import DpkgArchitectureBuildProcessValuesTable +from debputy.highlevel_manifest import MutableYAMLManifest +from debputy.substitution import Substitution + +_DH_VAR_RE = re.compile(r"([$][{])([A-Za-z0-9][-_:0-9A-Za-z]*)([}])") + + +class AcceptableMigrationIssues: + def __init__(self, values: FrozenSet[str]): + self._values = values + + def __contains__(self, item: str) -> bool: + return item in self._values or "ALL" in self._values + + +class UnsupportedFeature(RuntimeError): + @property + def message(self) -> str: + return cast("str", self.args[0]) + + @property + def issue_keys(self) -> Optional[Sequence[str]]: + if len(self.args) < 2: + return None + return cast("Sequence[str]", self.args[1]) + + +class ConflictingChange(RuntimeError): + @property + def message(self) -> str: + return cast("str", self.args[0]) + + +@dataclasses.dataclass(slots=True) +class FeatureMigration: + tagline: str + successful_manifest_changes: int = 0 + already_present: int = 0 + warnings: List[str] = dataclasses.field(default_factory=list) + remove_paths_on_success: List[str] = dataclasses.field(default_factory=list) + rename_paths_on_success: List[Tuple[str, str]] = dataclasses.field( + default_factory=list + ) + assumed_compat: Optional[int] = None + required_plugins: List[str] = dataclasses.field(default_factory=list) + + def warn(self, msg: str) -> None: + self.warnings.append(msg) + + def rename_on_success(self, source: str, dest: str) -> None: + self.rename_paths_on_success.append((source, dest)) + + def remove_on_success(self, path: str) -> None: + self.remove_paths_on_success.append(path) + + def require_plugin(self, debputy_plugin: str) -> None: + self.required_plugins.append(debputy_plugin) + + @property + def anything_to_do(self) -> bool: + return bool(self.total_changes_involved) + + @property + def performed_changes(self) -> int: + return ( + self.successful_manifest_changes + + len(self.remove_paths_on_success) + + len(self.rename_paths_on_success) + ) + + @property + def total_changes_involved(self) -> int: + return ( + self.successful_manifest_changes + + len(self.warnings) + + len(self.remove_paths_on_success) + + len(self.rename_paths_on_success) + ) + + +class DHMigrationSubstitution(Substitution): + def __init__( + self, + dpkg_arch_table: DpkgArchitectureBuildProcessValuesTable, + acceptable_migration_issues: AcceptableMigrationIssues, + feature_migration: FeatureMigration, + mutable_manifest: MutableYAMLManifest, + ) -> None: + self._acceptable_migration_issues = acceptable_migration_issues + self._dpkg_arch_table = dpkg_arch_table + self._feature_migration = feature_migration + self._mutable_manifest = mutable_manifest + # TODO: load 1:1 variables from the real subst instance (less stuff to keep in sync) + one2one = [ + "DEB_SOURCE", + "DEB_VERSION", + "DEB_VERSION_EPOCH_UPSTREAM", + "DEB_VERSION_UPSTREAM_REVISION", + "DEB_VERSION_UPSTREAM", + "SOURCE_DATE_EPOCH", + ] + self._builtin_substs = { + "Tab": "{{token:TAB}}", + "Space": " ", + "Newline": "{{token:NEWLINE}}", + "Dollar": "${}", + } + self._builtin_substs.update((x, "{{" + x + "}}") for x in one2one) + + def _replacement(self, key: str, definition_source: str) -> str: + if key in self._builtin_substs: + return self._builtin_substs[key] + if key in self._dpkg_arch_table: + return "{{" + key + "}}" + if key.startswith("env:"): + if "dh-subst-env" not in self._acceptable_migration_issues: + raise UnsupportedFeature( + "Use of environment based substitution variable {{" + + key + + "}} is not" + f" supported in debputy. The variable was spotted at {definition_source}", + ["dh-subst-env"], + ) + elif "dh-subst-unknown-variable" not in self._acceptable_migration_issues: + raise UnsupportedFeature( + "Unknown substitution variable {{" + + key + + "}}, which does not have a known" + f" counter part in debputy. The variable was spotted at {definition_source}", + ["dh-subst-unknown-variable"], + ) + manifest_definitions = self._mutable_manifest.manifest_definitions( + create_if_absent=False + ) + manifest_variables = manifest_definitions.manifest_variables( + create_if_absent=False + ) + if key not in manifest_variables.variables: + manifest_definitions.create_definition_if_missing() + manifest_variables[key] = "TODO: Provide variable value for " + key + self._feature_migration.warn( + "TODO: MANUAL MIGRATION of unresolved substitution variable {{" + + key + + "}} from" + + f" {definition_source}" + ) + self._feature_migration.successful_manifest_changes += 1 + + return "{{" + key + "}}" + + def substitute( + self, + value: str, + definition_source: str, + /, + escape_glob_characters: bool = False, + ) -> str: + if "${" not in value: + return value + replacement = self._apply_substitution( + _DH_VAR_RE, + value, + definition_source, + escape_glob_characters=escape_glob_characters, + ) + return replacement.replace("${}", "$") + + def with_extra_substitutions(self, **extra_substitutions: str) -> "Substitution": + return self diff --git a/src/debputy/elf_util.py b/src/debputy/elf_util.py new file mode 100644 index 0000000..518db37 --- /dev/null +++ b/src/debputy/elf_util.py @@ -0,0 +1,208 @@ +import io +import os +import struct +from typing import List, Optional, Callable, Tuple, Iterable + +from debputy.filesystem_scan import FSPath +from debputy.plugin.api import VirtualPath + +ELF_HEADER_SIZE32 = 136 +ELF_HEADER_SIZE64 = 232 +ELF_MAGIC = b"\x7fELF" +ELF_VERSION = 0x00000001 +ELF_ENDIAN_LE = 0x01 +ELF_ENDIAN_BE = 0x02 +ELF_TYPE_EXECUTABLE = 0x0002 +ELF_TYPE_SHARED_OBJECT = 0x0003 + +ELF_LINKING_TYPE_ANY = None +ELF_LINKING_TYPE_DYNAMIC = True +ELF_LINKING_TYPE_STATIC = False + +ELF_EI_ELFCLASS32 = 1 +ELF_EI_ELFCLASS64 = 2 + +ELF_PT_DYNAMIC = 2 + +ELF_EI_NIDENT = 0x10 + +# ELF header format: +# typedef struct { +# unsigned char e_ident[EI_NIDENT]; # <-- 16 / 0x10 bytes +# uint16_t e_type; +# uint16_t e_machine; +# uint32_t e_version; +# ElfN_Addr e_entry; +# ElfN_Off e_phoff; +# ElfN_Off e_shoff; +# uint32_t e_flags; +# uint16_t e_ehsize; +# uint16_t e_phentsize; +# uint16_t e_phnum; +# uint16_t e_shentsize; +# uint16_t e_shnum; +# uint16_t e_shstrndx; +# } ElfN_Ehdr; + + +class IncompleteFileError(RuntimeError): + pass + + +def is_so_or_exec_elf_file( + path: VirtualPath, + *, + assert_linking_type: Optional[bool] = ELF_LINKING_TYPE_ANY, +) -> bool: + is_elf, linking_type = _read_elf_file( + path, + determine_linking_type=assert_linking_type is not None, + ) + return is_elf and ( + assert_linking_type is ELF_LINKING_TYPE_ANY + or assert_linking_type == linking_type + ) + + +def _read_elf_file( + path: VirtualPath, + *, + determine_linking_type: bool = False, +) -> Tuple[bool, Optional[bool]]: + buffer_size = 4096 + fd_buffer = bytearray(buffer_size) + linking_type = None + fd: io.BufferedReader + with path.open(byte_io=True, buffering=io.DEFAULT_BUFFER_SIZE) as fd: + len_elf_header_raw = fd.readinto(fd_buffer) + if ( + not fd_buffer + or len_elf_header_raw < ELF_HEADER_SIZE32 + or not fd_buffer.startswith(ELF_MAGIC) + ): + return False, None + + elf_ei_class = fd_buffer[4] + endian_raw = fd_buffer[5] + if endian_raw == ELF_ENDIAN_LE: + endian = "<" + elif endian_raw == ELF_ENDIAN_BE: + endian = ">" + else: + return False, None + + if elf_ei_class == ELF_EI_ELFCLASS64: + offset_size = "Q" + # We know it needs to be a 64bit ELF, then the header must be + # large enough for that. + if len_elf_header_raw < ELF_HEADER_SIZE64: + return False, None + elif elf_ei_class == ELF_EI_ELFCLASS32: + offset_size = "L" + else: + return False, None + + elf_type, _elf_machine, elf_version = struct.unpack_from( + f"{endian}HHL", fd_buffer, offset=ELF_EI_NIDENT + ) + if elf_version != ELF_VERSION: + return False, None + if elf_type not in (ELF_TYPE_EXECUTABLE, ELF_TYPE_SHARED_OBJECT): + return False, None + + if determine_linking_type: + linking_type = _determine_elf_linking_type( + fd, fd_buffer, endian, offset_size + ) + if linking_type is None: + return False, None + + return True, linking_type + + +def _determine_elf_linking_type(fd, fd_buffer, endian, offset_size) -> Optional[bool]: + # To check the linking, we look for a DYNAMICALLY program header + # In other words, we assume static linking by default. + + linking_type = ELF_LINKING_TYPE_STATIC + # To do that, we need to read a bit more of the ELF header to + # locate the Program header table. + # + # Reading - in order at offset 0x18: + # * e_entry (ignored) + # * e_phoff + # * e_shoff (ignored) + # * e_flags (ignored) + # * e_ehsize (ignored) + # * e_phentsize + # * e_phnum + _, e_phoff, _, _, _, e_phentsize, e_phnum = struct.unpack_from( + f"{endian}{offset_size}{offset_size}{offset_size}LHHH", + fd_buffer, + offset=ELF_EI_NIDENT + 8, + ) + + # man 5 elf suggests that Program headers can be absent. If so, + # e_phnum will be zero - but we assume the same for e_phentsize. + if e_phnum == 0: + return linking_type + + # Program headers must be at least 4 bytes for this code to do + # anything sanely. In practise, it must be larger than that + # as well. Accordingly, at best this is a corrupted ELF file. + if e_phentsize < 4: + return None + + fd.seek(e_phoff, os.SEEK_SET) + unpack_format = f"{endian}L" + try: + for program_header_raw in _read_bytes_iteratively(fd, e_phentsize, e_phnum): + p_type = struct.unpack_from(unpack_format, program_header_raw)[0] + if p_type == ELF_PT_DYNAMIC: + linking_type = ELF_LINKING_TYPE_DYNAMIC + break + except IncompleteFileError: + return None + + return linking_type + + +def _read_bytes_iteratively( + fd: io.BufferedReader, + object_size: int, + object_count: int, +) -> Iterable[bytes]: + total_size = object_size * object_count + bytes_remaining = total_size + # FIXME: improve this to read larger chunks and yield them one-by-one + byte_buffer = bytearray(object_size) + + while bytes_remaining > 0: + n = fd.readinto(byte_buffer) + if n != object_size: + break + bytes_remaining -= n + yield byte_buffer + + if bytes_remaining: + raise IncompleteFileError() + + +def find_all_elf_files( + fs_root: VirtualPath, + *, + walk_filter: Optional[Callable[[VirtualPath, List[VirtualPath]], bool]] = None, + with_linking_type: Optional[bool] = ELF_LINKING_TYPE_ANY, +) -> List[VirtualPath]: + matches: List[VirtualPath] = [] + # FIXME: Implementation detail that fs_root is always `FSPath` and has `.walk()` + assert isinstance(fs_root, FSPath) + for path, children in fs_root.walk(): + if walk_filter is not None and not walk_filter(path, children): + continue + if not path.is_file or path.size < ELF_HEADER_SIZE32: + continue + if not is_so_or_exec_elf_file(path, assert_linking_type=with_linking_type): + continue + matches.append(path) + return matches diff --git a/src/debputy/exceptions.py b/src/debputy/exceptions.py new file mode 100644 index 0000000..a445997 --- /dev/null +++ b/src/debputy/exceptions.py @@ -0,0 +1,90 @@ +from typing import cast, TYPE_CHECKING + +if TYPE_CHECKING: + from debputy.plugin.api.impl_types import DebputyPluginMetadata + + +class DebputyRuntimeError(RuntimeError): + @property + def message(self) -> str: + return cast("str", self.args[0]) + + +class DebputySubstitutionError(DebputyRuntimeError): + pass + + +class DebputyManifestVariableRequiresDebianDirError(DebputySubstitutionError): + pass + + +class DebputyDpkgGensymbolsError(DebputyRuntimeError): + pass + + +class SymlinkLoopError(ValueError): + @property + def message(self) -> str: + return cast("str", self.args[0]) + + +class PureVirtualPathError(TypeError): + @property + def message(self) -> str: + return cast("str", self.args[0]) + + +class TestPathWithNonExistentFSPathError(TypeError): + @property + def message(self) -> str: + return cast("str", self.args[0]) + + +class DebputyFSError(DebputyRuntimeError): + pass + + +class DebputyFSIsROError(DebputyFSError): + pass + + +class PluginBaseError(DebputyRuntimeError): + pass + + +class DebputyPluginRuntimeError(PluginBaseError): + pass + + +class PluginNotFoundError(PluginBaseError): + pass + + +class PluginInitializationError(PluginBaseError): + pass + + +class PluginMetadataError(PluginBaseError): + pass + + +class PluginConflictError(PluginBaseError): + @property + def plugin_a(self) -> "DebputyPluginMetadata": + return cast("DebputyPluginMetadata", self.args[1]) + + @property + def plugin_b(self) -> "DebputyPluginMetadata": + return cast("DebputyPluginMetadata", self.args[2]) + + +class PluginAPIViolationError(PluginBaseError): + pass + + +class UnhandledOrUnexpectedErrorFromPluginError(PluginBaseError): + pass + + +class DebputyMetadataAccessError(DebputyPluginRuntimeError): + pass diff --git a/src/debputy/filesystem_scan.py b/src/debputy/filesystem_scan.py new file mode 100644 index 0000000..f7f97c2 --- /dev/null +++ b/src/debputy/filesystem_scan.py @@ -0,0 +1,1921 @@ +import atexit +import contextlib +import dataclasses +import errno +import io +import operator +import os +import stat +import subprocess +import tempfile +import time +from abc import ABC +from contextlib import suppress +from typing import ( + List, + Iterable, + Dict, + Optional, + Tuple, + Union, + Iterator, + Mapping, + cast, + Any, + ContextManager, + TextIO, + BinaryIO, + NoReturn, + Type, + Generic, +) +from weakref import ref, ReferenceType + +from debputy.exceptions import ( + PureVirtualPathError, + DebputyFSIsROError, + DebputyMetadataAccessError, + TestPathWithNonExistentFSPathError, + SymlinkLoopError, +) +from debputy.intermediate_manifest import PathType +from debputy.manifest_parser.base_types import ( + ROOT_DEFINITION, + StaticFileSystemOwner, + StaticFileSystemGroup, +) +from debputy.plugin.api.spec import ( + VirtualPath, + PathDef, + PathMetadataReference, + PMT, +) +from debputy.types import VP +from debputy.util import ( + generated_content_dir, + _error, + escape_shell, + assume_not_none, + _normalize_path, +) + +BY_BASENAME = operator.attrgetter("name") + + +class AlwaysEmptyReadOnlyMetadataReference(PathMetadataReference[PMT]): + __slots__ = ("_metadata_type", "_owning_plugin", "_current_plugin") + + def __init__( + self, + owning_plugin: str, + current_plugin: str, + metadata_type: Type[PMT], + ) -> None: + self._owning_plugin = owning_plugin + self._current_plugin = current_plugin + self._metadata_type = metadata_type + + @property + def is_present(self) -> bool: + return False + + @property + def can_read(self) -> bool: + return self._owning_plugin == self._current_plugin + + @property + def can_write(self) -> bool: + return False + + @property + def value(self) -> Optional[PMT]: + if self.can_read: + return None + raise DebputyMetadataAccessError( + f"Cannot read the metadata {self._metadata_type.__name__} owned by" + f" {self._owning_plugin} as the metadata has not been made" + f" readable to the plugin {self._current_plugin}." + ) + + @value.setter + def value(self, new_value: PMT) -> None: + if self._is_owner: + raise DebputyFSIsROError( + f"Cannot set the metadata {self._metadata_type.__name__} as the path is read-only" + ) + raise DebputyMetadataAccessError( + f"Cannot set the metadata {self._metadata_type.__name__} owned by" + f" {self._owning_plugin} as the metadata has not been made" + f" read-write to the plugin {self._current_plugin}." + ) + + @property + def _is_owner(self) -> bool: + return self._owning_plugin == self._current_plugin + + +@dataclasses.dataclass(slots=True) +class PathMetadataValue(Generic[PMT]): + owning_plugin: str + metadata_type: Type[PMT] + value: Optional[PMT] = None + + def can_read_value(self, current_plugin: str) -> bool: + return self.owning_plugin == current_plugin + + def can_write_value(self, current_plugin: str) -> bool: + return self.owning_plugin == current_plugin + + +class PathMetadataReferenceImplementation(PathMetadataReference[PMT]): + __slots__ = ("_owning_path", "_current_plugin", "_path_metadata_value") + + def __init__( + self, + owning_path: VirtualPath, + current_plugin: str, + path_metadata_value: PathMetadataValue[PMT], + ) -> None: + self._owning_path = owning_path + self._current_plugin = current_plugin + self._path_metadata_value = path_metadata_value + + @property + def is_present(self) -> bool: + if not self.can_read: + return False + return self._path_metadata_value.value is not None + + @property + def can_read(self) -> bool: + return self._path_metadata_value.can_read_value(self._current_plugin) + + @property + def can_write(self) -> bool: + if not self._path_metadata_value.can_write_value(self._current_plugin): + return False + owning_path = self._owning_path + return owning_path.is_read_write and not owning_path.is_detached + + @property + def value(self) -> Optional[PMT]: + if self.can_read: + return self._path_metadata_value.value + raise DebputyMetadataAccessError( + f"Cannot read the metadata {self._metadata_type_name} owned by" + f" {self._owning_plugin} as the metadata has not been made" + f" readable to the plugin {self._current_plugin}." + ) + + @value.setter + def value(self, new_value: PMT) -> None: + if not self.can_write: + m = "set" if new_value is not None else "delete" + raise DebputyMetadataAccessError( + f"Cannot {m} the metadata {self._metadata_type_name} owned by" + f" {self._owning_plugin} as the metadata has not been made" + f" read-write to the plugin {self._current_plugin}." + ) + owning_path = self._owning_path + if not owning_path.is_read_write: + raise DebputyFSIsROError( + f"Cannot set the metadata {self._metadata_type_name} as the path is read-only" + ) + if owning_path.is_detached: + raise TypeError( + f"Cannot set the metadata {self._metadata_type_name} as the path is detached" + ) + self._path_metadata_value.value = new_value + + @property + def _is_owner(self) -> bool: + return self._owning_plugin == self._current_plugin + + @property + def _owning_plugin(self) -> str: + return self._path_metadata_value.owning_plugin + + @property + def _metadata_type_name(self) -> str: + return self._path_metadata_value.metadata_type.__name__ + + +def _cp_a(source: str, dest: str) -> None: + cmd = ["cp", "-a", source, dest] + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError: + full_command = escape_shell(*cmd) + _error( + f"The attempt to make an internal copy of {escape_shell(source)} failed. Please review the output of cp" + f" above to understand what went wrong. The full command was: {full_command}" + ) + + +def _split_path(path: str) -> Tuple[bool, bool, List[str]]: + must_be_dir = True if path.endswith("/") else False + absolute = False + if path.startswith("/"): + absolute = True + path = "." + path + path_parts = path.rstrip("/").split("/") + if must_be_dir: + path_parts.append(".") + return absolute, must_be_dir, path_parts + + +def _root(path: VP) -> VP: + current = path + while True: + parent = current.parent_dir + if parent is None: + return current + current = parent + + +def _check_fs_path_is_file( + fs_path: str, + unlink_on_error: Optional["FSPath"] = None, +) -> None: + had_issue = False + try: + # FIXME: Check mode, and use the Virtual Path to cache the result as a side-effect + st = os.lstat(fs_path) + except FileNotFoundError: + had_issue = True + else: + if not stat.S_ISREG(st.st_mode) or st.st_nlink > 1: + had_issue = True + if not had_issue: + return + + if unlink_on_error: + with suppress(FileNotFoundError): + os.unlink(fs_path) + raise TypeError( + "The provided FS backing file was deleted, replaced with a non-file entry or it was hard" + " linked to another file. The entry has been disconnected." + ) + + +class CurrentPluginContextManager: + __slots__ = ("_plugin_names",) + + def __init__(self, initial_plugin_name: str) -> None: + self._plugin_names = [initial_plugin_name] + + @property + def current_plugin_name(self) -> str: + return self._plugin_names[-1] + + @contextlib.contextmanager + def change_plugin_context(self, new_plugin_name: str) -> Iterator[str]: + self._plugin_names.append(new_plugin_name) + yield new_plugin_name + self._plugin_names.pop() + + +class VirtualPathBase(VirtualPath, ABC): + __slots__ = () + + def _orphan_safe_path(self) -> str: + return self.path + + def _rw_check(self) -> None: + if not self.is_read_write: + raise DebputyFSIsROError( + f'Attempt to write to "{self._orphan_safe_path()}" failed:' + " Debputy Virtual File system is R/O." + ) + + def lookup(self, path: str) -> Optional["VirtualPathBase"]: + match, missing = self.attempt_lookup(path) + if missing: + return None + return match + + def attempt_lookup(self, path: str) -> Tuple["VirtualPathBase", List[str]]: + if self.is_detached: + raise ValueError( + f'Cannot perform lookup via "{self._orphan_safe_path()}": The path is detached' + ) + absolute, must_be_dir, path_parts = _split_path(path) + current = _root(self) if absolute else self + path_parts.reverse() + link_expansions = set() + while path_parts: + dir_part = path_parts.pop() + if dir_part == ".": + continue + if dir_part == "..": + p = current.parent_dir + if p is None: + raise ValueError(f'The path "{path}" escapes the root dir') + current = p + continue + try: + current = current[dir_part] + except KeyError: + path_parts.append(dir_part) + path_parts.reverse() + if must_be_dir: + path_parts.pop() + return current, path_parts + if current.is_symlink and path_parts: + if current.path in link_expansions: + # This is our loop detection for now. It might have some false positives where you + # could safely resolve the same symlink twice. However, given that this use-case is + # basically none existent in practice for packaging, we just stop here for now. + raise SymlinkLoopError( + f'The path "{path}" traversed the symlink "{current.path}" multiple' + " times. Currently, traversing the same symlink twice is considered" + " a loop by `debputy` even if the path would eventually resolve." + " Consider filing a feature request if you have a benign case that" + " triggers this error." + ) + link_expansions.add(current.path) + link_target = current.readlink() + link_absolute, _, link_path_parts = _split_path(link_target) + if link_absolute: + current = _root(current) + else: + current = assume_not_none(current.parent_dir) + link_path_parts.reverse() + path_parts.extend(link_path_parts) + return current, [] + + def mkdirs(self, path: str) -> "VirtualPath": + current: VirtualPath + current, missing_parts = self.attempt_lookup( + f"{path}/" if not path.endswith("/") else path + ) + if not current.is_dir: + raise ValueError( + f'mkdirs of "{path}" failed: This would require {current.path} to not exist OR be' + " a directory. However, that path exist AND is a not directory." + ) + for missing_part in missing_parts: + assert missing_part not in (".", "..") + current = current.mkdir(missing_part) + return current + + def prune_if_empty_dir(self) -> None: + """Remove this and all (now) empty parent directories + + Same as: `rmdir --ignore-fail-on-non-empty --parents` + + This operation may cause the path (and any of its parent directories) to become "detached" + and therefore unsafe to use in further operations. + """ + self._rw_check() + + if not self.is_dir: + raise TypeError(f"{self._orphan_safe_path()} is not a directory") + if any(self.iterdir): + return + parent_dir = assume_not_none(self.parent_dir) + + # Recursive does not matter; we already know the directory is empty. + self.unlink() + + # Note: The root dir must never be deleted. This works because when delegating it to the root + # directory, its implementation of this method is a no-op. If this is later rewritten to an + # inline loop (rather than recursion), be sure to preserve this feature. + parent_dir.prune_if_empty_dir() + + def _current_plugin(self) -> str: + if self.is_detached: + raise TypeError("Cannot resolve the current plugin; path is detached") + current = self + while True: + next_parent = current.parent_dir + if next_parent is None: + break + current = next_parent + assert current is not None + return cast("FSRootDir", current)._current_plugin() + + +class FSPath(VirtualPathBase, ABC): + __slots__ = ( + "_basename", + "_parent_dir", + "_children", + "_path_cache", + "_parent_path_cache", + "_last_known_parent_path", + "_mode", + "_owner", + "_group", + "_mtime", + "_stat_cache", + "_metadata", + "__weakref__", + ) + + def __init__( + self, + basename: str, + parent: Optional["FSPath"], + children: Optional[Dict[str, "FSPath"]] = None, + initial_mode: Optional[int] = None, + mtime: Optional[float] = None, + stat_cache: Optional[os.stat_result] = None, + ) -> None: + self._basename = basename + self._path_cache: Optional[str] = None + self._parent_path_cache: Optional[str] = None + self._children = children + self._last_known_parent_path: Optional[str] = None + self._mode = initial_mode + self._mtime = mtime + self._stat_cache = stat_cache + self._metadata: Dict[Tuple[str, Type[Any]], PathMetadataValue[Any]] = {} + self._owner = ROOT_DEFINITION + self._group = ROOT_DEFINITION + + # The self._parent_dir = None is to create `_parent_dir` because the parent_dir setter calls + # is_orphaned, which assumes self._parent_dir is an attribute. + self._parent_dir: Optional[ReferenceType["FSPath"]] = None + if parent is not None: + self.parent_dir = parent + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}({self._orphan_safe_path()!r}," + f" is_file={self.is_file}," + f" is_dir={self.is_dir}," + f" is_symlink={self.is_symlink}," + f" has_fs_path={self.has_fs_path}," + f" children_len={len(self._children) if self._children else 0})" + ) + + @property + def name(self) -> str: + return self._basename + + @name.setter + def name(self, new_name: str) -> None: + self._rw_check() + if new_name == self._basename: + return + if self.is_detached: + self._basename = new_name + return + self._rw_check() + parent = self.parent_dir + # This little parent_dir dance ensures the parent dir detects the rename properly + self.parent_dir = None + self._basename = new_name + self.parent_dir = parent + + @property + def iterdir(self) -> Iterable["FSPath"]: + if self._children is not None: + yield from self._children.values() + + def all_paths(self) -> Iterable["FSPath"]: + yield self + if not self.is_dir: + return + by_basename = BY_BASENAME + stack = sorted(self.iterdir, key=by_basename, reverse=True) + while stack: + current = stack.pop() + yield current + if current.is_dir and not current.is_detached: + stack.extend(sorted(current.iterdir, key=by_basename, reverse=True)) + + def walk(self) -> Iterable[Tuple["FSPath", List["FSPath"]]]: + # FIXME: can this be more "os.walk"-like without making it harder to implement? + if not self.is_dir: + yield self, [] + return + by_basename = BY_BASENAME + stack = [self] + while stack: + current = stack.pop() + children = sorted(current.iterdir, key=by_basename) + assert not children or current.is_dir + yield current, children + # Removing the directory counts as discarding the children. + if not current.is_detached: + stack.extend(reversed(children)) + + def _orphan_safe_path(self) -> str: + if not self.is_detached or self._last_known_parent_path is not None: + return self.path + return f"<orphaned>/{self.name}" + + @property + def is_detached(self) -> bool: + parent = self._parent_dir + if parent is None: + return True + resolved_parent = parent() + if resolved_parent is None: + return True + return resolved_parent.is_detached + + # The __getitem__ behaves like __getitem__ from Dict but __iter__ would ideally work like a Sequence. + # However, that does not feel compatible, so lets force people to use .children instead for the Sequence + # behaviour to avoid surprises for now. + # (Maybe it is a non-issue, but it is easier to add the API later than to remove it once we have committed + # to using it) + __iter__ = None + + def __getitem__(self, key) -> "FSPath": + if self._children is None: + raise KeyError( + f"{key} (note: {self._orphan_safe_path()!r} has no children)" + ) + if isinstance(key, FSPath): + key = key.name + return self._children[key] + + def __delitem__(self, key) -> None: + self._rw_check() + children = self._children + if children is None: + raise KeyError(key) + del children[key] + + def get(self, key: str) -> "Optional[FSPath]": + try: + return self[key] + except KeyError: + return None + + def __contains__(self, item: object) -> bool: + if isinstance(item, VirtualPath): + return item.parent_dir is self + if not isinstance(item, str): + return False + m = self.get(item) + return m is not None + + def _add_child(self, child: "FSPath") -> None: + self._rw_check() + if not self.is_dir: + raise TypeError(f"{self._orphan_safe_path()!r} is not a directory") + if self._children is None: + self._children = {} + + conflict_child = self.get(child.name) + if conflict_child is not None: + conflict_child.unlink(recursive=True) + self._children[child.name] = child + + @property + def tar_path(self) -> str: + path = self.path + if self.is_dir: + return path + "/" + return path + + @property + def path(self) -> str: + parent_path = self.parent_dir_path + if ( + self._parent_path_cache is not None + and self._parent_path_cache == parent_path + ): + return assume_not_none(self._path_cache) + if parent_path is None: + raise ReferenceError( + f"The path {self.name} is detached! {self.__class__.__name__}" + ) + self._parent_path_cache = parent_path + ret = os.path.join(parent_path, self.name) + self._path_cache = ret + return ret + + @property + def parent_dir(self) -> Optional["FSPath"]: + p_ref = self._parent_dir + p = p_ref() if p_ref is not None else None + if p is None: + raise ReferenceError( + f"The path {self.name} is detached! {self.__class__.__name__}" + ) + return p + + @parent_dir.setter + def parent_dir(self, new_parent: Optional["FSPath"]) -> None: + self._rw_check() + if new_parent is not None: + if not new_parent.is_dir: + raise ValueError( + f"The parent {new_parent._orphan_safe_path()} must be a directory" + ) + new_parent._rw_check() + old_parent = None + self._last_known_parent_path = None + if not self.is_detached: + old_parent = self.parent_dir + old_parent_children = assume_not_none(assume_not_none(old_parent)._children) + del old_parent_children[self.name] + if new_parent is not None: + self._parent_dir = ref(new_parent) + new_parent._add_child(self) + else: + if old_parent is not None and not old_parent.is_detached: + self._last_known_parent_path = old_parent.path + self._parent_dir = None + self._parent_path_cache = None + + @property + def parent_dir_path(self) -> Optional[str]: + if self.is_detached: + return self._last_known_parent_path + return assume_not_none(self.parent_dir).path + + def chown( + self, + owner: Optional[StaticFileSystemOwner], + group: Optional[StaticFileSystemGroup], + ) -> None: + """Change the owner/group of this path + + :param owner: The desired owner definition for this path. If None, then no change of owner is performed. + :param group: The desired group definition for this path. If None, then no change of group is performed. + """ + self._rw_check() + + if owner is not None: + self._owner = owner.ownership_definition + if group is not None: + self._group = group.ownership_definition + + def stat(self) -> os.stat_result: + st = self._stat_cache + if st is None: + st = self._uncached_stat() + self._stat_cache = st + return st + + def _uncached_stat(self) -> os.stat_result: + return os.lstat(self.fs_path) + + @property + def mode(self) -> int: + current_mode = self._mode + if current_mode is None: + current_mode = stat.S_IMODE(self.stat().st_mode) + self._mode = current_mode + return current_mode + + @mode.setter + def mode(self, new_mode: int) -> None: + self._rw_check() + min_bit = 0o500 if self.is_dir else 0o400 + if (new_mode & min_bit) != min_bit: + omode = oct(new_mode)[2:] + omin = oct(min_bit)[2:] + raise ValueError( + f'Attempt to set mode of path "{self._orphan_safe_path()}" to {omode} rejected;' + f" Minimum requirements are {omin} (read-bit and, for dirs, exec bit for user)." + " There are no paths that do not need these requirements met and they can cause" + " problems during build or on the final system." + ) + self._mode = new_mode + + @property + def mtime(self) -> float: + mtime = self._mtime + if mtime is None: + mtime = self.stat().st_mtime + self._mtime = mtime + return mtime + + @mtime.setter + def mtime(self, new_mtime: float) -> None: + self._rw_check() + self._mtime = new_mtime + + @property + def tar_owner_info(self) -> Tuple[str, int, str, int]: + owner = self._owner + group = self._group + return ( + owner.entity_name, + owner.entity_id, + group.entity_name, + group.entity_id, + ) + + @property + def _can_replace_inline(self) -> bool: + return False + + @contextlib.contextmanager + def add_file( + self, + name: str, + *, + unlink_if_exists: bool = True, + use_fs_path_mode: bool = False, + mode: int = 0o0644, + mtime: Optional[float] = None, + # Special-case parameters that are not exposed in the API + fs_basename_matters: bool = False, + subdir_key: Optional[str] = None, + ) -> Iterator["FSPath"]: + if "/" in name or name in {".", ".."}: + raise ValueError(f'Invalid file name: "{name}"') + if not self.is_dir: + raise TypeError( + f"Cannot create {self._orphan_safe_path()}/{name}:" + f" {self._orphan_safe_path()} is not a directory" + ) + self._rw_check() + existing = self.get(name) + if existing is not None: + if not unlink_if_exists: + raise ValueError( + f'The path "{self._orphan_safe_path()}" already contains a file called "{name}"' + f" and exist_ok was False" + ) + existing.unlink(recursive=False) + + if fs_basename_matters and subdir_key is None: + raise ValueError( + "When fs_basename_matters is True, a subdir_key must be provided" + ) + + directory = generated_content_dir(subdir_key=subdir_key) + + if fs_basename_matters: + fs_path = os.path.join(directory, name) + with open(fs_path, "xb") as _: + # Ensure that the fs_path exists + pass + child = FSBackedFilePath( + name, + self, + fs_path, + replaceable_inline=True, + mtime=mtime, + ) + yield child + else: + with tempfile.NamedTemporaryFile( + dir=directory, suffix=f"__{name}", delete=False + ) as fd: + fs_path = fd.name + child = FSBackedFilePath( + name, + self, + fs_path, + replaceable_inline=True, + mtime=mtime, + ) + fd.close() + yield child + + if use_fs_path_mode: + # Ensure the caller can see the current mode + os.chmod(fs_path, mode) + _check_fs_path_is_file(fs_path, unlink_on_error=child) + child._reset_caches() + if not use_fs_path_mode: + child.mode = mode + + def insert_file_from_fs_path( + self, + name: str, + fs_path: str, + *, + exist_ok: bool = True, + use_fs_path_mode: bool = False, + mode: int = 0o0644, + require_copy_on_write: bool = True, + follow_symlinks: bool = True, + reference_path: Optional[VirtualPath] = None, + ) -> "FSPath": + if "/" in name or name in {".", ".."}: + raise ValueError(f'Invalid file name: "{name}"') + if not self.is_dir: + raise TypeError( + f"Cannot create {self._orphan_safe_path()}/{name}:" + f" {self._orphan_safe_path()} is not a directory" + ) + self._rw_check() + if name in self and not exist_ok: + raise ValueError( + f'The path "{self._orphan_safe_path()}" already contains a file called "{name}"' + f" and exist_ok was False" + ) + new_fs_path = fs_path + if follow_symlinks: + if reference_path is not None: + raise ValueError( + "The reference_path cannot be used with follow_symlinks" + ) + new_fs_path = os.path.realpath(new_fs_path, strict=True) + + fmode: Optional[int] = mode + if use_fs_path_mode: + fmode = None + + st = None + if reference_path is None: + st = os.lstat(new_fs_path) + if stat.S_ISDIR(st.st_mode): + raise ValueError( + f'The provided path "{fs_path}" is a directory. However, this' + " method does not support directories" + ) + + if not stat.S_ISREG(st.st_mode): + if follow_symlinks: + raise ValueError( + f"The resolved fs_path ({new_fs_path}) was not a file." + ) + raise ValueError(f"The provided fs_path ({fs_path}) was not a file.") + return FSBackedFilePath( + name, + self, + new_fs_path, + initial_mode=fmode, + stat_cache=st, + replaceable_inline=not require_copy_on_write, + reference_path=reference_path, + ) + + def add_symlink( + self, + link_name: str, + link_target: str, + *, + reference_path: Optional[VirtualPath] = None, + ) -> "FSPath": + if "/" in link_name or link_name in {".", ".."}: + raise ValueError( + f'Invalid file name: "{link_name}" (it must be a valid basename)' + ) + if not self.is_dir: + raise TypeError( + f"Cannot create {self._orphan_safe_path()}/{link_name}:" + f" {self._orphan_safe_path()} is not a directory" + ) + self._rw_check() + + existing = self.get(link_name) + if existing: + # Emulate ln -sf with attempts a non-recursive unlink first. + existing.unlink(recursive=False) + + return SymlinkVirtualPath( + link_name, + self, + link_target, + reference_path=reference_path, + ) + + def mkdir( + self, + name: str, + *, + reference_path: Optional[VirtualPath] = None, + ) -> "FSPath": + if "/" in name or name in {".", ".."}: + raise ValueError( + f'Invalid file name: "{name}" (it must be a valid basename)' + ) + if not self.is_dir: + raise TypeError( + f"Cannot create {self._orphan_safe_path()}/{name}:" + f" {self._orphan_safe_path()} is not a directory" + ) + if reference_path is not None and not reference_path.is_dir: + raise ValueError( + f'The provided fs_path "{reference_path.fs_path}" exist but it is not a directory!' + ) + self._rw_check() + + existing = self.get(name) + if existing: + raise ValueError(f"Path {existing.path} already exist") + return VirtualDirectoryFSPath(name, self, reference_path=reference_path) + + def mkdirs(self, path: str) -> "FSPath": + return cast("FSPath", super().mkdirs(path)) + + @property + def is_read_write(self) -> bool: + """When true, the file system entry may be mutated + + :return: Whether file system mutations are permitted. + """ + if self.is_detached: + return True + return assume_not_none(self.parent_dir).is_read_write + + def unlink(self, *, recursive: bool = False) -> None: + """Unlink a file or a directory + + This operation will detach the path from the file system (causing "is_detached" to return True). + + Note that the root directory cannot be deleted. + + :param recursive: If True, then non-empty directories will be unlinked as well removing everything inside them + as well. When False, an error is raised if the path is a non-empty directory + """ + if self.is_detached: + return + if not recursive and any(self.iterdir): + raise ValueError( + f'Refusing to unlink "{self.path}": The directory was not empty and recursive was False' + ) + # The .parent_dir setter does a _rw_check() for us. + self.parent_dir = None + + def _reset_caches(self) -> None: + self._mtime = None + self._stat_cache = None + + def metadata( + self, + metadata_type: Type[PMT], + *, + owning_plugin: Optional[str] = None, + ) -> PathMetadataReference[PMT]: + current_plugin = self._current_plugin() + if owning_plugin is None: + owning_plugin = current_plugin + metadata_key = (owning_plugin, metadata_type) + metadata_value = self._metadata.get(metadata_key) + if metadata_value is None: + if self.is_detached: + raise TypeError( + f"Cannot access the metadata {metadata_type.__name__}: The path is detached." + ) + if not self.is_read_write: + return AlwaysEmptyReadOnlyMetadataReference( + owning_plugin, + current_plugin, + metadata_type, + ) + metadata_value = PathMetadataValue(owning_plugin, metadata_type) + self._metadata[metadata_key] = metadata_value + return PathMetadataReferenceImplementation( + self, + current_plugin, + metadata_value, + ) + + @contextlib.contextmanager + def replace_fs_path_content( + self, + *, + use_fs_path_mode: bool = False, + ) -> Iterator[str]: + if not self.is_file: + raise TypeError( + f'Cannot replace contents of "{self._orphan_safe_path()}" as it is not a file' + ) + self._rw_check() + fs_path = self.fs_path + if not self._can_replace_inline: + fs_path = self.fs_path + directory = generated_content_dir() + with tempfile.NamedTemporaryFile( + dir=directory, suffix=f"__{self.name}", delete=False + ) as new_path_fd: + new_path_fd.close() + _cp_a(fs_path, new_path_fd.name) + fs_path = new_path_fd.name + self._replaced_path(fs_path) + assert self.fs_path == fs_path + + current_mtime = self._mtime + if current_mtime is not None: + os.utime(fs_path, (current_mtime, current_mtime)) + + current_mode = self.mode + yield fs_path + _check_fs_path_is_file(fs_path, unlink_on_error=self) + if not use_fs_path_mode: + os.chmod(fs_path, current_mode) + self._reset_caches() + + def _replaced_path(self, new_fs_path: str) -> None: + raise NotImplementedError + + +class VirtualFSPathBase(FSPath, ABC): + __slots__ = () + + def __init__( + self, + basename: str, + parent: Optional["FSPath"], + children: Optional[Dict[str, "FSPath"]] = None, + initial_mode: Optional[int] = None, + mtime: Optional[float] = None, + stat_cache: Optional[os.stat_result] = None, + ) -> None: + super().__init__( + basename, + parent, + children, + initial_mode=initial_mode, + mtime=mtime, + stat_cache=stat_cache, + ) + + @property + def mtime(self) -> float: + mtime = self._mtime + if mtime is None: + mtime = time.time() + self._mtime = mtime + return mtime + + @property + def has_fs_path(self) -> bool: + return False + + def stat(self) -> os.stat_result: + if not self.has_fs_path: + raise PureVirtualPathError( + "stat() is only applicable to paths backed by the file system. The path" + f" {self._orphan_safe_path()!r} is purely virtual" + ) + return super().stat() + + @property + def fs_path(self) -> str: + if not self.has_fs_path: + raise PureVirtualPathError( + "fs_path is only applicable to paths backed by the file system. The path" + f" {self._orphan_safe_path()!r} is purely virtual" + ) + return self.fs_path + + +class FSRootDir(FSPath): + __slots__ = ("_fs_path", "_fs_read_write", "_plugin_context") + + def __init__(self, fs_path: Optional[str] = None) -> None: + self._fs_path = fs_path + self._fs_read_write = True + super().__init__( + ".", + None, + children={}, + initial_mode=0o755, + ) + self._plugin_context = CurrentPluginContextManager("debputy") + + @property + def is_detached(self) -> bool: + return False + + def _orphan_safe_path(self) -> str: + return self.name + + @property + def path(self) -> str: + return self.name + + @property + def parent_dir(self) -> Optional["FSPath"]: + return None + + @parent_dir.setter + def parent_dir(self, new_parent: Optional[FSPath]) -> None: + if new_parent is not None: + raise ValueError("The root directory cannot become a non-root directory") + + @property + def parent_dir_path(self) -> Optional[str]: + return None + + @property + def is_dir(self) -> bool: + return True + + @property + def is_file(self) -> bool: + return False + + @property + def is_symlink(self) -> bool: + return False + + def readlink(self) -> str: + raise TypeError(f'"{self._orphan_safe_path()!r}" is a directory; not a symlink') + + @property + def has_fs_path(self) -> bool: + return self._fs_path is not None + + def stat(self) -> os.stat_result: + if not self.has_fs_path: + raise PureVirtualPathError( + "stat() is only applicable to paths backed by the file system. The path" + f" {self._orphan_safe_path()!r} is purely virtual" + ) + return os.stat(self.fs_path) + + @property + def fs_path(self) -> str: + if not self.has_fs_path: + raise PureVirtualPathError( + "fs_path is only applicable to paths backed by the file system. The path" + f" {self._orphan_safe_path()!r} is purely virtual" + ) + return assume_not_none(self._fs_path) + + @property + def is_read_write(self) -> bool: + return self._fs_read_write + + @is_read_write.setter + def is_read_write(self, new_value: bool) -> None: + self._fs_read_write = new_value + + def prune_if_empty_dir(self) -> None: + # No-op for the root directory. There is never a case where you want to delete this directory + # (and even if you could, debputy will need it for technical reasons, so the root dir stays) + return + + def unlink(self, *, recursive: bool = False) -> None: + # There is never a case where you want to delete this directory (and even if you could, + # debputy will need it for technical reasons, so the root dir stays) + raise TypeError("Cannot delete the root directory") + + def _current_plugin(self) -> str: + return self._plugin_context.current_plugin_name + + @contextlib.contextmanager + def change_plugin_context(self, new_plugin: str) -> Iterator[str]: + with self._plugin_context.change_plugin_context(new_plugin) as r: + yield r + + +class VirtualPathWithReference(VirtualFSPathBase, ABC): + __slots__ = ("_reference_path",) + + def __init__( + self, + basename: str, + parent: FSPath, + *, + default_mode: int, + reference_path: Optional[VirtualPath] = None, + ) -> None: + super().__init__( + basename, + parent=parent, + initial_mode=reference_path.mode if reference_path else default_mode, + ) + self._reference_path = reference_path + + @property + def has_fs_path(self) -> bool: + ref_path = self._reference_path + return ref_path is not None and ref_path.has_fs_path + + @property + def mtime(self) -> float: + mtime = self._mtime + if mtime is None: + ref_path = self._reference_path + if ref_path: + mtime = ref_path.mtime + else: + mtime = super().mtime + self._mtime = mtime + return mtime + + @mtime.setter + def mtime(self, new_mtime: float) -> None: + self._rw_check() + self._mtime = new_mtime + + @property + def fs_path(self) -> str: + ref_path = self._reference_path + if ref_path is not None and ( + not super().has_fs_path or super().fs_path == ref_path.fs_path + ): + return ref_path.fs_path + return super().fs_path + + def stat(self) -> os.stat_result: + ref_path = self._reference_path + if ref_path is not None and ( + not super().has_fs_path or super().fs_path == ref_path.fs_path + ): + return ref_path.stat() + return super().stat() + + def open( + self, + *, + byte_io: bool = False, + buffering: int = -1, + ) -> Union[TextIO, BinaryIO]: + reference_path = self._reference_path + if reference_path is not None and reference_path.fs_path == self.fs_path: + return reference_path.open(byte_io=byte_io, buffering=buffering) + return super().open(byte_io=byte_io, buffering=buffering) + + +class VirtualDirectoryFSPath(VirtualPathWithReference): + __slots__ = ("_reference_path",) + + def __init__( + self, + basename: str, + parent: FSPath, + *, + reference_path: Optional[VirtualPath] = None, + ) -> None: + super().__init__( + basename, + parent, + reference_path=reference_path, + default_mode=0o755, + ) + self._reference_path = reference_path + assert reference_path is None or reference_path.is_dir + + @property + def is_dir(self) -> bool: + return True + + @property + def is_file(self) -> bool: + return False + + @property + def is_symlink(self) -> bool: + return False + + def readlink(self) -> str: + raise TypeError(f'"{self._orphan_safe_path()!r}" is a directory; not a symlink') + + +class SymlinkVirtualPath(VirtualPathWithReference): + __slots__ = ("_link_target",) + + def __init__( + self, + basename: str, + parent_dir: FSPath, + link_target: str, + *, + reference_path: Optional[VirtualPath] = None, + ) -> None: + super().__init__( + basename, + parent=parent_dir, + default_mode=_SYMLINK_MODE, + reference_path=reference_path, + ) + self._link_target = link_target + + @property + def is_dir(self) -> bool: + return False + + @property + def is_file(self) -> bool: + return False + + @property + def is_symlink(self) -> bool: + return True + + def readlink(self) -> str: + return self._link_target + + +class FSBackedFilePath(VirtualPathWithReference): + __slots__ = ("_fs_path", "_replaceable_inline") + + def __init__( + self, + basename: str, + parent_dir: FSPath, + fs_path: str, + *, + replaceable_inline: bool = False, + initial_mode: Optional[int] = None, + mtime: Optional[float] = None, + stat_cache: Optional[os.stat_result] = None, + reference_path: Optional[VirtualPath] = None, + ) -> None: + super().__init__( + basename, + parent_dir, + default_mode=0o644, + reference_path=reference_path, + ) + self._fs_path = fs_path + self._replaceable_inline = replaceable_inline + if initial_mode is not None: + self.mode = initial_mode + if mtime is not None: + self._mtime = mtime + self._stat_cache = stat_cache + assert ( + not replaceable_inline or "debputy/scratch-dir/" in fs_path + ), f"{fs_path} should not be inline-replaceable -- {self.path}" + + @property + def is_dir(self) -> bool: + return False + + @property + def is_file(self) -> bool: + return True + + @property + def is_symlink(self) -> bool: + return False + + def readlink(self) -> str: + raise TypeError(f'"{self._orphan_safe_path()!r}" is a file; not a symlink') + + @property + def has_fs_path(self) -> bool: + return True + + @property + def fs_path(self) -> str: + return self._fs_path + + @property + def _can_replace_inline(self) -> bool: + return self._replaceable_inline + + def _replaced_path(self, new_fs_path: str) -> None: + self._fs_path = new_fs_path + self._reference_path = None + self._replaceable_inline = True + + +_SYMLINK_MODE = 0o777 + + +class VirtualTestPath(FSPath): + __slots__ = ( + "_path_type", + "_has_fs_path", + "_fs_path", + "_link_target", + "_content", + "_materialized_content", + ) + + def __init__( + self, + basename: str, + parent_dir: Optional[FSPath], + mode: Optional[int] = None, + mtime: Optional[float] = None, + is_dir: bool = False, + has_fs_path: Optional[bool] = False, + fs_path: Optional[str] = None, + link_target: Optional[str] = None, + content: Optional[str] = None, + materialized_content: Optional[str] = None, + ) -> None: + if is_dir: + self._path_type = PathType.DIRECTORY + elif link_target is not None: + self._path_type = PathType.SYMLINK + if mode is not None and mode != _SYMLINK_MODE: + raise ValueError( + f'Please do not assign a mode to symlinks. Triggered for "{basename}".' + ) + assert mode is None or mode == _SYMLINK_MODE + else: + self._path_type = PathType.FILE + + if mode is not None: + initial_mode = mode + else: + initial_mode = 0o755 if is_dir else 0o644 + + self._link_target = link_target + if has_fs_path is None: + has_fs_path = bool(fs_path) + self._has_fs_path = has_fs_path + self._fs_path = fs_path + self._materialized_content = materialized_content + super().__init__( + basename, + parent=parent_dir, + initial_mode=initial_mode, + mtime=mtime, + ) + self._content = content + + @property + def is_dir(self) -> bool: + return self._path_type == PathType.DIRECTORY + + @property + def is_file(self) -> bool: + return self._path_type == PathType.FILE + + @property + def is_symlink(self) -> bool: + return self._path_type == PathType.SYMLINK + + def readlink(self) -> str: + if not self.is_symlink: + raise TypeError(f"readlink is only valid for symlinks ({self.path!r})") + link_target = self._link_target + assert link_target is not None + return link_target + + @property + def mtime(self) -> float: + if self._mtime is None: + self._mtime = time.time() + return self._mtime + + @mtime.setter + def mtime(self, new_mtime: float) -> None: + self._rw_check() + self._mtime = new_mtime + + @property + def has_fs_path(self) -> bool: + return self._has_fs_path + + def stat(self) -> os.stat_result: + if self.has_fs_path: + path = self.fs_path + if path is None: + raise PureVirtualPathError( + f"The test wants a real stat of {self._orphan_safe_path()!r}, which this mock path" + " cannot provide!" + ) + try: + return os.stat(path) + except FileNotFoundError as e: + raise PureVirtualPathError( + f"The test wants a real stat of {self._orphan_safe_path()!r}, which this mock path" + " cannot provide! (An fs_path was provided, but it did not exist)" + ) from e + + raise PureVirtualPathError( + "stat() is only applicable to paths backed by the file system. The path" + f" {self._orphan_safe_path()!r} is purely virtual" + ) + + @property + def size(self) -> int: + if self._content is not None: + return len(self._content.encode("utf-8")) + if not self.has_fs_path or self.fs_path is None: + return 0 + return self.stat().st_size + + @property + def fs_path(self) -> str: + if self.has_fs_path: + if self._fs_path is None and self._materialized_content is not None: + with tempfile.NamedTemporaryFile( + mode="w+t", + encoding="utf-8", + suffix=f"__{self.name}", + delete=False, + ) as fd: + filepath = fd.name + fd.write(self._materialized_content) + self._fs_path = filepath + atexit.register(lambda: os.unlink(filepath)) + + path = self._fs_path + if path is None: + raise PureVirtualPathError( + f"The test wants a real file system entry of {self._orphan_safe_path()!r}, which this " + " mock path cannot provide!" + ) + return path + raise PureVirtualPathError( + "fs_path is only applicable to paths backed by the file system. The path" + f" {self._orphan_safe_path()!r} is purely virtual" + ) + + def replace_fs_path_content( + self, + *, + use_fs_path_mode: bool = False, + ) -> ContextManager[str]: + if self._content is not None: + raise TypeError( + f"The `replace_fs_path_content()` method was called on {self.path}. Said path was" + " created with `content` but for this method to work, the path should have been" + " created with `materialized_content`" + ) + return super().replace_fs_path_content(use_fs_path_mode=use_fs_path_mode) + + def open( + self, + *, + byte_io: bool = False, + buffering: int = -1, + ) -> Union[TextIO, BinaryIO]: + if self._content is None: + try: + return super().open(byte_io=byte_io, buffering=buffering) + except FileNotFoundError as e: + raise TestPathWithNonExistentFSPathError( + "The test path {self.path} had an fs_path {self._fs_path}, which does not" + " exist. This exception can only occur in the testsuite. Either have the" + " test provide content for the path (`virtual_path_def(..., content=...) or," + " if that is too painful in general, have the code accept this error as a " + " test only-case and provide a default." + ) from e + + if byte_io: + return io.BytesIO(self._content.encode("utf-8")) + return io.StringIO(self._content) + + def _replaced_path(self, new_fs_path: str) -> None: + self._fs_path = new_fs_path + + +class FSROOverlay(VirtualPathBase): + __slots__ = ( + "_path", + "_fs_path", + "_parent", + "_stat_cache", + "_readlink_cache", + "_children", + "_stat_failed_cache", + "__weakref__", + ) + + def __init__( + self, + path: str, + fs_path: str, + parent: Optional["FSROOverlay"], + ) -> None: + self._path: str = path + self._fs_path: str = _normalize_path(fs_path, with_prefix=False) + self._parent: Optional[ReferenceType[FSROOverlay]] = ( + ref(parent) if parent is not None else None + ) + self._stat_cache: Optional[os.stat_result] = None + self._readlink_cache: Optional[str] = None + self._stat_failed_cache = False + self._children: Optional[Mapping[str, FSROOverlay]] = None + + @classmethod + def create_root_dir(cls, path: str, fs_path: str) -> "FSROOverlay": + return FSROOverlay(path, fs_path, None) + + @property + def name(self) -> str: + return os.path.basename(self._path) + + @property + def iterdir(self) -> Iterable["FSROOverlay"]: + if not self.is_dir: + return + if self._children is None: + self._ensure_children_are_resolved() + yield from assume_not_none(self._children).values() + + def lookup(self, path: str) -> Optional["FSROOverlay"]: + if not self.is_dir: + return None + if self._children is None: + self._ensure_children_are_resolved() + + absolute, _, path_parts = _split_path(path) + current = cast("FSROOverlay", _root(self)) if absolute else self + for no, dir_part in enumerate(path_parts): + if dir_part == ".": + continue + if dir_part == "..": + p = current.parent_dir + if current is None: + raise ValueError(f'The path "{path}" escapes the root dir') + current = p + continue + try: + current = current[dir_part] + except KeyError: + return None + return current + + def all_paths(self) -> Iterable["FSROOverlay"]: + yield self + if not self.is_dir: + return + stack = list(self.iterdir) + stack.reverse() + while stack: + current = stack.pop() + yield current + if current.is_dir: + if current._children is None: + current._ensure_children_are_resolved() + stack.extend(reversed(current._children.values())) + + def _ensure_children_are_resolved(self) -> None: + if not self.is_dir or self._children: + return + dir_path = self.path + dir_fs_path = self.fs_path + children = {} + for name in sorted(os.listdir(dir_fs_path), key=os.path.basename): + child_path = os.path.join(dir_path, name) if dir_path != "." else name + child_fs_path = ( + os.path.join(dir_fs_path, name) if dir_fs_path != "." else name + ) + children[name] = FSROOverlay( + child_path, + child_fs_path, + self, + ) + self._children = children + + @property + def is_detached(self) -> bool: + return False + + def __getitem__(self, key) -> "VirtualPath": + if not self.is_dir: + raise KeyError(key) + if self._children is None: + self._ensure_children_are_resolved() + if isinstance(key, FSPath): + key = key.name + return self._children[key] + + def __delitem__(self, key) -> None: + self._error_ro_fs() + + @property + def is_read_write(self) -> bool: + return False + + def _rw_check(self) -> None: + self._error_ro_fs() + + def _error_ro_fs(self) -> NoReturn: + raise DebputyFSIsROError( + f'Attempt to write to "{self.path}" failed:' + " Debputy Virtual File system is R/O." + ) + + @property + def path(self) -> str: + return self._path + + @property + def parent_dir(self) -> Optional["FSROOverlay"]: + parent = self._parent + if parent is None: + return None + resolved = parent() + if resolved is None: + raise RuntimeError("Parent was garbage collected!") + return resolved + + def stat(self) -> os.stat_result: + if self._stat_failed_cache: + raise FileNotFoundError( + errno.ENOENT, os.strerror(errno.ENOENT), self.fs_path + ) + + if self._stat_cache is None: + try: + self._stat_cache = os.lstat(self.fs_path) + except FileNotFoundError: + self._stat_failed_cache = True + raise + return self._stat_cache + + @property + def mode(self) -> int: + return stat.S_IMODE(self.stat().st_mode) + + @mode.setter + def mode(self, _unused: int) -> None: + self._error_ro_fs() + + @property + def mtime(self) -> float: + return self.stat().st_mtime + + @mtime.setter + def mtime(self, new_mtime: float) -> None: + self._error_ro_fs() + + def readlink(self) -> str: + if not self.is_symlink: + raise TypeError(f"readlink is only valid for symlinks ({self.path!r})") + if self._readlink_cache is None: + self._readlink_cache = os.readlink(self.fs_path) + return self._readlink_cache + + @property + def fs_path(self) -> str: + return self._fs_path + + @property + def is_dir(self) -> bool: + # The root path can have a non-existent fs_path (such as d/tmp not always existing) + try: + return stat.S_ISDIR(self.stat().st_mode) + except FileNotFoundError: + return False + + @property + def is_file(self) -> bool: + # The root path can have a non-existent fs_path (such as d/tmp not always existing) + try: + return stat.S_ISREG(self.stat().st_mode) + except FileNotFoundError: + return False + + @property + def is_symlink(self) -> bool: + # The root path can have a non-existent fs_path (such as d/tmp not always existing) + try: + return stat.S_ISLNK(self.stat().st_mode) + except FileNotFoundError: + return False + + @property + def has_fs_path(self) -> bool: + return True + + def open( + self, + *, + byte_io: bool = False, + buffering: int = -1, + ) -> Union[TextIO, BinaryIO]: + # Allow symlinks for open here, because we can let the OS resolve the symlink reliably in this + # case. + if not self.is_file and not self.is_symlink: + raise TypeError( + f"Cannot open {self.path} for reading: It is not a file nor a symlink" + ) + + if byte_io: + return open(self.fs_path, "rb", buffering=buffering) + return open(self.fs_path, "rt", encoding="utf-8", buffering=buffering) + + def chown( + self, + owner: Optional[StaticFileSystemOwner], + group: Optional[StaticFileSystemGroup], + ) -> None: + self._error_ro_fs() + + def mkdir(self, name: str) -> "VirtualPath": + self._error_ro_fs() + + def add_file( + self, + name: str, + *, + unlink_if_exists: bool = True, + use_fs_path_mode: bool = False, + mode: int = 0o0644, + mtime: Optional[float] = None, + ) -> ContextManager["VirtualPath"]: + self._error_ro_fs() + + def add_symlink(self, link_name: str, link_target: str) -> "VirtualPath": + self._error_ro_fs() + + def unlink(self, *, recursive: bool = False) -> None: + self._error_ro_fs() + + def metadata( + self, + metadata_type: Type[PMT], + *, + owning_plugin: Optional[str] = None, + ) -> PathMetadataReference[PMT]: + current_plugin = self._current_plugin() + if owning_plugin is None: + owning_plugin = current_plugin + return AlwaysEmptyReadOnlyMetadataReference( + owning_plugin, + current_plugin, + metadata_type, + ) + + +class FSROOverlayRootDir(FSROOverlay): + __slots__ = ("_plugin_context",) + + def __init__(self, path: str, fs_path: str) -> None: + super().__init__(path, fs_path, None) + self._plugin_context = CurrentPluginContextManager("debputy") + + def _current_plugin(self) -> str: + return self._plugin_context.current_plugin_name + + @contextlib.contextmanager + def change_plugin_context(self, new_plugin: str) -> Iterator[str]: + with self._plugin_context.change_plugin_context(new_plugin) as r: + yield r + + +def as_path_def(pd: Union[str, PathDef]) -> PathDef: + return PathDef(pd) if isinstance(pd, str) else pd + + +def as_path_defs(paths: Iterable[Union[str, PathDef]]) -> Iterable[PathDef]: + yield from (as_path_def(p) for p in paths) + + +def build_virtual_fs( + paths: Iterable[Union[str, PathDef]], + read_write_fs: bool = False, +) -> "FSPath": + root_dir: Optional[FSRootDir] = None + directories: Dict[str, FSPath] = {} + non_directories = set() + + def _ensure_parent_dirs(p: str) -> None: + current = p.rstrip("/") + missing_dirs = [] + while True: + current = os.path.dirname(current) + if current in directories: + break + if current in non_directories: + raise ValueError( + f'Conflicting definition for "{current}". The path "{p}" wants it as a directory,' + ' but it is defined as a non-directory. (Ensure dirs end with "/")' + ) + missing_dirs.append(current) + for dir_path in reversed(missing_dirs): + parent_dir = directories[os.path.dirname(dir_path)] + d = VirtualTestPath(os.path.basename(dir_path), parent_dir, is_dir=True) + directories[dir_path] = d + + for path_def in as_path_defs(paths): + path = path_def.path_name + if path in directories or path in non_directories: + raise ValueError( + f'Duplicate definition of "{path}". Can be false positive if input is not in' + ' "correct order" (ensure directories occur before their children)' + ) + if root_dir is None: + root_fs_path = None + if path in (".", "./", "/"): + root_fs_path = path_def.fs_path + root_dir = FSRootDir(fs_path=root_fs_path) + directories["."] = root_dir + + if path not in (".", "./", "/") and not path.startswith("./"): + path = "./" + path + if path not in (".", "./", "/"): + _ensure_parent_dirs(path) + if path in (".", "./"): + assert "." in directories + continue + is_dir = False + if path.endswith("/"): + path = path[:-1] + is_dir = True + directory = directories[os.path.dirname(path)] + assert not is_dir or not bool( + path_def.link_target + ), f"is_dir={is_dir} vs. link_target={path_def.link_target}" + fs_path = VirtualTestPath( + os.path.basename(path), + directory, + is_dir=is_dir, + mode=path_def.mode, + mtime=path_def.mtime, + has_fs_path=path_def.has_fs_path, + fs_path=path_def.fs_path, + link_target=path_def.link_target, + content=path_def.content, + materialized_content=path_def.materialized_content, + ) + assert not fs_path.is_detached + if fs_path.is_dir: + directories[fs_path.path] = fs_path + else: + non_directories.add(fs_path.path) + + if root_dir is None: + root_dir = FSRootDir() + + root_dir.is_read_write = read_write_fs + return root_dir diff --git a/src/debputy/highlevel_manifest.py b/src/debputy/highlevel_manifest.py new file mode 100644 index 0000000..bae5cdb --- /dev/null +++ b/src/debputy/highlevel_manifest.py @@ -0,0 +1,1608 @@ +import dataclasses +import functools +import os +import textwrap +from contextlib import suppress +from dataclasses import dataclass, field +from typing import ( + List, + Dict, + Iterable, + Mapping, + Any, + Union, + Optional, + TypeVar, + Generic, + cast, + Set, + Tuple, + Sequence, + FrozenSet, +) + +from debian.debian_support import DpkgArchTable +from ruamel.yaml import YAML +from ruamel.yaml.comments import CommentedMap, CommentedSeq + +from ._deb_options_profiles import DebBuildOptionsAndProfiles +from ._manifest_constants import * +from .architecture_support import DpkgArchitectureBuildProcessValuesTable +from .builtin_manifest_rules import builtin_mode_normalization_rules +from .debhelper_emulation import ( + dhe_dbgsym_root_dir, + assert_no_dbgsym_migration, + read_dbgsym_file, +) +from .exceptions import DebputySubstitutionError +from .filesystem_scan import FSPath, FSRootDir, FSROOverlay +from .installations import ( + InstallRule, + SourcePathMatcher, + PathAlreadyInstalledOrDiscardedError, + NoMatchForInstallPatternError, + InstallRuleContext, + BinaryPackageInstallRuleContext, + InstallSearchDirContext, + SearchDir, +) +from .intermediate_manifest import TarMember, PathType, IntermediateManifest +from .maintscript_snippet import ( + DpkgMaintscriptHelperCommand, + MaintscriptSnippetContainer, +) +from .manifest_conditions import ConditionContext +from .manifest_parser.base_types import FileSystemMatchRule, FileSystemExactMatchRule +from .manifest_parser.util import AttributePath +from .packager_provided_files import PackagerProvidedFile +from .packages import BinaryPackage, SourcePackage +from .plugin.api.impl import BinaryCtrlAccessorProviderCreator +from .plugin.api.impl_types import ( + PackageProcessingContextProvider, + PackageDataTable, +) +from .plugin.api.feature_set import PluginProvidedFeatureSet +from .plugin.api.spec import FlushableSubstvars, VirtualPath +from .substitution import Substitution +from .transformation_rules import ( + TransformationRule, + ModeNormalizationTransformationRule, + NormalizeShebangLineTransformation, +) +from .util import ( + _error, + _warn, + debian_policy_normalize_symlink_target, + generated_content_dir, + _info, +) + +MANIFEST_YAML = YAML() + + +@dataclass(slots=True) +class DbgsymInfo: + dbgsym_fs_root: FSPath + dbgsym_ids: List[str] + + +@dataclass(slots=True, frozen=True) +class BinaryPackageData: + source_package: SourcePackage + binary_package: BinaryPackage + binary_staging_root_dir: str + control_output_dir: Optional[str] + fs_root: FSPath + substvars: FlushableSubstvars + package_metadata_context: PackageProcessingContextProvider + ctrl_creator: BinaryCtrlAccessorProviderCreator + dbgsym_info: DbgsymInfo + + +@dataclass(slots=True) +class PackageTransformationDefinition: + binary_package: BinaryPackage + substitution: Substitution + is_auto_generated_package: bool + binary_version: Optional[str] = None + search_dirs: Optional[List[FileSystemExactMatchRule]] = None + dpkg_maintscript_helper_snippets: List[DpkgMaintscriptHelperCommand] = field( + default_factory=list + ) + maintscript_snippets: Dict[str, MaintscriptSnippetContainer] = field( + default_factory=dict + ) + transformations: List[TransformationRule] = field(default_factory=list) + reserved_packager_provided_files: Dict[str, List[PackagerProvidedFile]] = field( + default_factory=dict + ) + install_rules: List[InstallRule] = field(default_factory=list) + + +def _path_to_tar_member( + path: FSPath, + clamp_mtime_to: int, +) -> TarMember: + mtime = float(clamp_mtime_to) + owner, uid, group, gid = path.tar_owner_info + mode = path.mode + + if path.has_fs_path: + mtime = min(mtime, path.mtime) + + if path.is_dir: + path_type = PathType.DIRECTORY + elif path.is_file: + # TODO: someday we will need to deal with hardlinks and it might appear here. + path_type = PathType.FILE + elif path.is_symlink: + # Special-case that we resolve immediately (since we need to normalize the target anyway) + link_target = debian_policy_normalize_symlink_target( + path.path, + path.readlink(), + ) + return TarMember.virtual_path( + path.tar_path, + PathType.SYMLINK, + mtime, + link_target=link_target, + # Force mode to be 0777 as that is the mode we see in the data.tar. In theory, tar lets you set + # it to whatever. However, for reproducibility, we have to be well-behaved - and that is 0777. + mode=0o0777, + owner=owner, + uid=uid, + group=group, + gid=gid, + ) + else: + assert not path.is_symlink + raise AssertionError( + f"Unsupported file type: {path.path} - not a file, dir nor a symlink!" + ) + + if not path.has_fs_path: + assert not path.is_file + return TarMember.virtual_path( + path.tar_path, + path_type, + mtime, + mode=mode, + owner=owner, + uid=uid, + group=group, + gid=gid, + ) + may_steal_fs_path = path._can_replace_inline + return TarMember.from_file( + path.tar_path, + path.fs_path, + mode=mode, + uid=uid, + owner=owner, + gid=gid, + group=group, + path_type=path_type, + path_mtime=mtime, + clamp_mtime_to=clamp_mtime_to, + may_steal_fs_path=may_steal_fs_path, + ) + + +def _generate_intermediate_manifest( + fs_root: FSPath, + clamp_mtime_to: int, +) -> Iterable[TarMember]: + symlinks = [] + for path in fs_root.all_paths(): + tar_member = _path_to_tar_member(path, clamp_mtime_to) + if tar_member.path_type == PathType.SYMLINK: + symlinks.append(tar_member) + continue + yield tar_member + yield from symlinks + + +ST = TypeVar("ST") +T = TypeVar("T") + + +class AbstractYAMLSubStore(Generic[ST]): + def __init__( + self, + parent_store: Any, + parent_key: Optional[Union[int, str]], + store: Optional[ST] = None, + ) -> None: + if parent_store is not None and parent_key is not None: + try: + from_parent_store = parent_store[parent_key] + except (KeyError, IndexError): + from_parent_store = None + if ( + store is not None + and from_parent_store is not None + and store is not parent_store + ): + raise ValueError( + "Store is provided but is not the one already in the parent store" + ) + if store is None: + store = from_parent_store + self._parent_store = parent_store + self._parent_key = parent_key + self._is_detached = ( + parent_key is None or parent_store is None or parent_key not in parent_store + ) + assert self._is_detached or store is not None + if store is None: + store = self._create_new_instance() + self._store: ST = store + + def _create_new_instance(self) -> ST: + raise NotImplementedError + + def create_definition_if_missing(self) -> None: + if self._is_detached: + self.create_definition() + + def create_definition(self) -> None: + if not self._is_detached: + raise RuntimeError("Definition is already present") + parent_store = self._parent_store + if parent_store is None: + raise RuntimeError( + f"Definition is not attached to any parent!? ({self.__class__.__name__})" + ) + if isinstance(parent_store, list): + assert self._parent_key is None + self._parent_key = len(parent_store) + self._parent_store.append(self._store) + else: + parent_store[self._parent_key] = self._store + self._is_detached = False + + def remove_definition(self) -> None: + self._ensure_attached() + del self._parent_store[self._parent_key] + if isinstance(self._parent_store, list): + self._parent_key = None + self._is_detached = True + + def _ensure_attached(self) -> None: + if self._is_detached: + raise RuntimeError("The definition has been removed!") + + +class AbstractYAMLListSubStore(Generic[T], AbstractYAMLSubStore[List[T]]): + def _create_new_instance(self) -> List[T]: + return CommentedSeq() + + +class AbstractYAMLDictSubStore(Generic[T], AbstractYAMLSubStore[Dict[str, T]]): + def _create_new_instance(self) -> Dict[str, T]: + return CommentedMap() + + +class MutableCondition: + @classmethod + def arch_matches(cls, arch_filter: str) -> CommentedMap: + return CommentedMap({MK_CONDITION_ARCH_MATCHES: arch_filter}) + + @classmethod + def build_profiles_matches(cls, build_profiles_matches: str) -> CommentedMap: + return CommentedMap( + {MK_CONDITION_BUILD_PROFILES_MATCHES: build_profiles_matches} + ) + + +class MutableYAMLSymlink(AbstractYAMLDictSubStore[Any]): + @classmethod + def new_symlink( + cls, link_path: str, link_target: str, condition: Optional[Any] + ) -> "MutableYAMLSymlink": + inner = { + MK_TRANSFORMATIONS_CREATE_SYMLINK_LINK_PATH: link_path, + MK_TRANSFORMATIONS_CREATE_SYMLINK_LINK_TARGET: link_target, + } + content = {MK_TRANSFORMATIONS_CREATE_SYMLINK: inner} + if condition is not None: + inner["when"] = condition + return cls(None, None, store=CommentedMap(content)) + + @property + def symlink_path(self) -> str: + return self._store[MK_TRANSFORMATIONS_CREATE_SYMLINK][ + MK_TRANSFORMATIONS_CREATE_SYMLINK_LINK_PATH + ] + + @symlink_path.setter + def symlink_path(self, path: str) -> None: + self._store[MK_TRANSFORMATIONS_CREATE_SYMLINK][ + MK_TRANSFORMATIONS_CREATE_SYMLINK_LINK_PATH + ] = path + + @property + def symlink_target(self) -> Optional[str]: + return self._store[MK_TRANSFORMATIONS_CREATE_SYMLINK][ + MK_TRANSFORMATIONS_CREATE_SYMLINK_LINK_TARGET + ] + + @symlink_target.setter + def symlink_target(self, target: str) -> None: + self._store[MK_TRANSFORMATIONS_CREATE_SYMLINK][ + MK_TRANSFORMATIONS_CREATE_SYMLINK_LINK_TARGET + ] = target + + +class MutableYAMLConffileManagementItem(AbstractYAMLDictSubStore[Any]): + @classmethod + def rm_conffile( + cls, + conffile: str, + prior_to_version: Optional[str], + owning_package: Optional[str], + ) -> "MutableYAMLConffileManagementItem": + r = cls( + None, + None, + store=CommentedMap( + { + MK_CONFFILE_MANAGEMENT_REMOVE: CommentedMap( + {MK_CONFFILE_MANAGEMENT_REMOVE_PATH: conffile} + ) + } + ), + ) + r.prior_to_version = prior_to_version + r.owning_package = owning_package + return r + + @classmethod + def mv_conffile( + cls, + old_conffile: str, + new_conffile: str, + prior_to_version: Optional[str], + owning_package: Optional[str], + ) -> "MutableYAMLConffileManagementItem": + r = cls( + None, + None, + store=CommentedMap( + { + MK_CONFFILE_MANAGEMENT_RENAME: CommentedMap( + { + MK_CONFFILE_MANAGEMENT_RENAME_SOURCE: old_conffile, + MK_CONFFILE_MANAGEMENT_RENAME_TARGET: new_conffile, + } + ) + } + ), + ) + r.prior_to_version = prior_to_version + r.owning_package = owning_package + return r + + @property + def _container(self) -> Dict[str, Any]: + assert len(self._store) == 1 + return next(iter(self._store.values())) + + @property + def command(self) -> str: + assert len(self._store) == 1 + return next(iter(self._store)) + + @property + def obsolete_conffile(self) -> str: + if self.command == MK_CONFFILE_MANAGEMENT_REMOVE: + return self._container[MK_CONFFILE_MANAGEMENT_REMOVE_PATH] + assert self.command == MK_CONFFILE_MANAGEMENT_RENAME + return self._container[MK_CONFFILE_MANAGEMENT_RENAME_SOURCE] + + @obsolete_conffile.setter + def obsolete_conffile(self, value: str) -> None: + if self.command == MK_CONFFILE_MANAGEMENT_REMOVE: + self._container[MK_CONFFILE_MANAGEMENT_REMOVE_PATH] = value + else: + assert self.command == MK_CONFFILE_MANAGEMENT_RENAME + self._container[MK_CONFFILE_MANAGEMENT_RENAME_SOURCE] = value + + @property + def new_conffile(self) -> str: + if self.command != MK_CONFFILE_MANAGEMENT_RENAME: + raise TypeError( + f"The new_conffile attribute is only applicable to command {MK_CONFFILE_MANAGEMENT_RENAME}." + f" This is a {self.command}" + ) + return self._container[MK_CONFFILE_MANAGEMENT_RENAME_TARGET] + + @new_conffile.setter + def new_conffile(self, value: str) -> None: + if self.command != MK_CONFFILE_MANAGEMENT_RENAME: + raise TypeError( + f"The new_conffile attribute is only applicable to command {MK_CONFFILE_MANAGEMENT_RENAME}." + f" This is a {self.command}" + ) + self._container[MK_CONFFILE_MANAGEMENT_RENAME_TARGET] = value + + @property + def prior_to_version(self) -> Optional[str]: + return self._container.get(MK_CONFFILE_MANAGEMENT_X_PRIOR_TO_VERSION) + + @prior_to_version.setter + def prior_to_version(self, value: Optional[str]) -> None: + if value is None: + try: + del self._container[MK_CONFFILE_MANAGEMENT_X_PRIOR_TO_VERSION] + except KeyError: + pass + else: + self._container[MK_CONFFILE_MANAGEMENT_X_PRIOR_TO_VERSION] = value + + @property + def owning_package(self) -> Optional[str]: + return self._container[MK_CONFFILE_MANAGEMENT_X_PRIOR_TO_VERSION] + + @owning_package.setter + def owning_package(self, value: Optional[str]) -> None: + if value is None: + try: + del self._container[MK_CONFFILE_MANAGEMENT_X_OWNING_PACKAGE] + except KeyError: + pass + else: + self._container[MK_CONFFILE_MANAGEMENT_X_OWNING_PACKAGE] = value + + +class MutableYAMLPackageDefinition(AbstractYAMLDictSubStore): + def _list_store( + self, key, *, create_if_absent: bool = False + ) -> Optional[List[Dict[str, Any]]]: + if self._is_detached or key not in self._store: + if create_if_absent: + return None + self.create_definition_if_missing() + self._store[key] = [] + return self._store[key] + + def _insert_item(self, key: str, item: AbstractYAMLDictSubStore) -> None: + parent_store = self._list_store(key, create_if_absent=True) + assert parent_store is not None + if not item._is_detached or ( + item._parent_store is not None and item._parent_store is not parent_store + ): + raise RuntimeError( + "Item is already attached or associated with a different container" + ) + item._parent_store = parent_store + item.create_definition() + + def add_symlink(self, symlink: MutableYAMLSymlink) -> None: + self._insert_item(MK_TRANSFORMATIONS, symlink) + + def symlinks(self) -> Iterable[MutableYAMLSymlink]: + store = self._list_store(MK_TRANSFORMATIONS) + if store is None: + return + for i in range(len(store)): + d = store[i] + if d and isinstance(d, dict) and len(d) == 1 and "symlink" in d: + yield MutableYAMLSymlink(store, i) + + def conffile_management_items(self) -> Iterable[MutableYAMLConffileManagementItem]: + store = self._list_store(MK_CONFFILE_MANAGEMENT) + if store is None: + return + yield from ( + MutableYAMLConffileManagementItem(store, i) for i in range(len(store)) + ) + + def add_conffile_management( + self, conffile_management_item: MutableYAMLConffileManagementItem + ) -> None: + self._insert_item(MK_CONFFILE_MANAGEMENT, conffile_management_item) + + +class AbstractMutableYAMLInstallRule(AbstractYAMLDictSubStore): + @property + def _container(self) -> Dict[str, Any]: + assert len(self._store) == 1 + return next(iter(self._store.values())) + + @property + def into(self) -> Optional[List[str]]: + v = self._container[MK_INSTALLATIONS_INSTALL_INTO] + if v is None: + return None + if isinstance(v, str): + return [v] + return v + + @into.setter + def into(self, new_value: Optional[Union[str, List[str]]]) -> None: + if new_value is None: + with suppress(KeyError): + del self._container[MK_INSTALLATIONS_INSTALL_INTO] + return + if isinstance(new_value, str): + self._container[MK_INSTALLATIONS_INSTALL_INTO] = new_value + return + new_list = CommentedSeq(new_value) + self._container[MK_INSTALLATIONS_INSTALL_INTO] = new_list + + @property + def when(self) -> Optional[Union[str, Mapping[str, Any]]]: + return self._container[MK_CONDITION_WHEN] + + @when.setter + def when(self, new_value: Optional[Union[str, Mapping[str, Any]]]) -> None: + if new_value is None: + with suppress(KeyError): + del self._container[MK_CONDITION_WHEN] + return + if isinstance(new_value, str): + self._container[MK_CONDITION_WHEN] = new_value + return + new_map = CommentedMap(new_value) + self._container[MK_CONDITION_WHEN] = new_map + + @classmethod + def install_dest( + cls, + sources: Union[str, List[str]], + into: Optional[Union[str, List[str]]], + *, + dest_dir: Optional[str] = None, + when: Optional[Union[str, Mapping[str, Any]]] = None, + ) -> "MutableYAMLInstallRuleInstall": + k = MK_INSTALLATIONS_INSTALL_SOURCES + if isinstance(sources, str): + k = MK_INSTALLATIONS_INSTALL_SOURCE + r = MutableYAMLInstallRuleInstall( + None, + None, + store=CommentedMap( + { + MK_INSTALLATIONS_INSTALL: CommentedMap( + { + k: sources, + } + ) + } + ), + ) + r.dest_dir = dest_dir + r.into = into + if when is not None: + r.when = when + return r + + @classmethod + def multi_dest_install( + cls, + sources: Union[str, List[str]], + dest_dirs: Sequence[str], + into: Optional[Union[str, List[str]]], + *, + when: Optional[Union[str, Mapping[str, Any]]] = None, + ) -> "MutableYAMLInstallRuleInstall": + k = MK_INSTALLATIONS_INSTALL_SOURCES + if isinstance(sources, str): + k = MK_INSTALLATIONS_INSTALL_SOURCE + r = MutableYAMLInstallRuleInstall( + None, + None, + store=CommentedMap( + { + MK_INSTALLATIONS_MULTI_DEST_INSTALL: CommentedMap( + { + k: sources, + "dest-dirs": dest_dirs, + } + ) + } + ), + ) + r.into = into + if when is not None: + r.when = when + return r + + @classmethod + def install_as( + cls, + source: str, + install_as: str, + into: Optional[Union[str, List[str]]], + when: Optional[Union[str, Mapping[str, Any]]] = None, + ) -> "MutableYAMLInstallRuleInstall": + r = MutableYAMLInstallRuleInstall( + None, + None, + store=CommentedMap( + { + MK_INSTALLATIONS_INSTALL: CommentedMap( + { + MK_INSTALLATIONS_INSTALL_SOURCE: source, + MK_INSTALLATIONS_INSTALL_AS: install_as, + } + ) + } + ), + ) + r.into = into + if when is not None: + r.when = when + return r + + @classmethod + def install_doc_as( + cls, + source: str, + install_as: str, + into: Optional[Union[str, List[str]]], + when: Optional[Union[str, Mapping[str, Any]]] = None, + ) -> "MutableYAMLInstallRuleInstall": + r = MutableYAMLInstallRuleInstall( + None, + None, + store=CommentedMap( + { + MK_INSTALLATIONS_INSTALL_DOCS: CommentedMap( + { + MK_INSTALLATIONS_INSTALL_SOURCE: source, + MK_INSTALLATIONS_INSTALL_AS: install_as, + } + ) + } + ), + ) + r.into = into + if when is not None: + r.when = when + return r + + @classmethod + def install_docs( + cls, + sources: Union[str, List[str]], + into: Optional[Union[str, List[str]]], + *, + dest_dir: Optional[str] = None, + when: Optional[Union[str, Mapping[str, Any]]] = None, + ) -> "MutableYAMLInstallRuleInstall": + k = MK_INSTALLATIONS_INSTALL_SOURCES + if isinstance(sources, str): + k = MK_INSTALLATIONS_INSTALL_SOURCE + r = MutableYAMLInstallRuleInstall( + None, + None, + store=CommentedMap( + { + MK_INSTALLATIONS_INSTALL_DOCS: CommentedMap( + { + k: sources, + } + ) + } + ), + ) + r.into = into + r.dest_dir = dest_dir + if when is not None: + r.when = when + return r + + @classmethod + def install_examples( + cls, + sources: Union[str, List[str]], + into: Optional[Union[str, List[str]]], + when: Optional[Union[str, Mapping[str, Any]]] = None, + ) -> "MutableYAMLInstallRuleInstallExamples": + k = MK_INSTALLATIONS_INSTALL_SOURCES + if isinstance(sources, str): + k = MK_INSTALLATIONS_INSTALL_SOURCE + r = MutableYAMLInstallRuleInstallExamples( + None, + None, + store=CommentedMap( + { + MK_INSTALLATIONS_INSTALL_EXAMPLES: CommentedMap( + { + k: sources, + } + ) + } + ), + ) + r.into = into + if when is not None: + r.when = when + return r + + @classmethod + def install_man( + cls, + sources: Union[str, List[str]], + into: Optional[Union[str, List[str]]], + language: Optional[str], + when: Optional[Union[str, Mapping[str, Any]]] = None, + ) -> "MutableYAMLInstallRuleMan": + k = MK_INSTALLATIONS_INSTALL_SOURCES + if isinstance(sources, str): + k = MK_INSTALLATIONS_INSTALL_SOURCE + r = MutableYAMLInstallRuleMan( + None, + None, + store=CommentedMap( + { + MK_INSTALLATIONS_INSTALL_MAN: CommentedMap( + { + k: sources, + } + ) + } + ), + ) + r.language = language + r.into = into + if when is not None: + r.when = when + return r + + @classmethod + def discard( + cls, + sources: Union[str, List[str]], + ) -> "MutableYAMLInstallRuleDiscard": + return MutableYAMLInstallRuleDiscard( + None, + None, + store=CommentedMap({MK_INSTALLATIONS_DISCARD: sources}), + ) + + +class MutableYAMLInstallRuleInstallExamples(AbstractMutableYAMLInstallRule): + pass + + +class MutableYAMLInstallRuleMan(AbstractMutableYAMLInstallRule): + @property + def language(self) -> Optional[str]: + return self._container[MK_INSTALLATIONS_INSTALL_MAN_LANGUAGE] + + @language.setter + def language(self, new_value: Optional[str]) -> None: + if new_value is not None: + self._container[MK_INSTALLATIONS_INSTALL_MAN_LANGUAGE] = new_value + return + with suppress(KeyError): + del self._container[MK_INSTALLATIONS_INSTALL_MAN_LANGUAGE] + + +class MutableYAMLInstallRuleDiscard(AbstractMutableYAMLInstallRule): + pass + + +class MutableYAMLInstallRuleInstall(AbstractMutableYAMLInstallRule): + @property + def sources(self) -> List[str]: + v = self._container[MK_INSTALLATIONS_INSTALL_SOURCES] + if isinstance(v, str): + return [v] + return v + + @sources.setter + def sources(self, new_value: Union[str, List[str]]) -> None: + if isinstance(new_value, str): + self._container[MK_INSTALLATIONS_INSTALL_SOURCES] = new_value + return + new_list = CommentedSeq(new_value) + self._container[MK_INSTALLATIONS_INSTALL_SOURCES] = new_list + + @property + def dest_dir(self) -> Optional[str]: + return self._container.get(MK_INSTALLATIONS_INSTALL_DEST_DIR) + + @dest_dir.setter + def dest_dir(self, new_value: Optional[str]) -> None: + if new_value is not None and self.dest_as is not None: + raise ValueError( + f'Cannot both have a "{MK_INSTALLATIONS_INSTALL_DEST_DIR}" and' + f' "{MK_INSTALLATIONS_INSTALL_AS}"' + ) + if new_value is not None: + self._container[MK_INSTALLATIONS_INSTALL_DEST_DIR] = new_value + else: + with suppress(KeyError): + del self._container[MK_INSTALLATIONS_INSTALL_DEST_DIR] + + @property + def dest_as(self) -> Optional[str]: + return self._container.get(MK_INSTALLATIONS_INSTALL_AS) + + @dest_as.setter + def dest_as(self, new_value: Optional[str]) -> None: + if new_value is not None: + if self.dest_dir is not None: + raise ValueError( + f'Cannot both have a "{MK_INSTALLATIONS_INSTALL_DEST_DIR}" and' + f' "{MK_INSTALLATIONS_INSTALL_AS}"' + ) + + sources = self._container[MK_INSTALLATIONS_INSTALL_SOURCES] + if isinstance(sources, list): + if len(sources) != 1: + raise ValueError( + f'Cannot have "{MK_INSTALLATIONS_INSTALL_AS}" when' + f' "{MK_INSTALLATIONS_INSTALL_SOURCES}" is not exactly one item' + ) + self.sources = sources[0] + self._container[MK_INSTALLATIONS_INSTALL_AS] = new_value + else: + with suppress(KeyError): + del self._container[MK_INSTALLATIONS_INSTALL_AS] + + +class MutableYAMLInstallationsDefinition(AbstractYAMLListSubStore[Any]): + def append(self, install_rule: AbstractMutableYAMLInstallRule) -> None: + parent_store = self._store + if not install_rule._is_detached or ( + install_rule._parent_store is not None + and install_rule._parent_store is not parent_store + ): + raise RuntimeError( + "Item is already attached or associated with a different container" + ) + self.create_definition_if_missing() + install_rule._parent_store = parent_store + install_rule.create_definition() + + def extend(self, install_rules: Iterable[AbstractMutableYAMLInstallRule]) -> None: + parent_store = self._store + for install_rule in install_rules: + if not install_rule._is_detached or ( + install_rule._parent_store is not None + and install_rule._parent_store is not parent_store + ): + raise RuntimeError( + "Item is already attached or associated with a different container" + ) + self.create_definition_if_missing() + install_rule._parent_store = parent_store + install_rule.create_definition() + + +class MutableYAMLManifestVariables(AbstractYAMLDictSubStore): + @property + def variables(self) -> Dict[str, Any]: + return self._store + + def __setitem__(self, key: str, value: Any) -> None: + self._store[key] = value + self.create_definition_if_missing() + + +class MutableYAMLManifestDefinitions(AbstractYAMLDictSubStore): + def manifest_variables( + self, *, create_if_absent: bool = True + ) -> MutableYAMLManifestVariables: + d = MutableYAMLManifestVariables(self._store, MK_MANIFEST_VARIABLES) + if create_if_absent: + d.create_definition_if_missing() + return d + + +class MutableYAMLManifest: + def __init__(self, store: Any) -> None: + self._store = store + + @classmethod + def empty_manifest(cls) -> "MutableYAMLManifest": + return cls(CommentedMap({MK_MANIFEST_VERSION: DEFAULT_MANIFEST_VERSION})) + + @property + def manifest_version(self) -> str: + return self._store[MK_MANIFEST_VERSION] + + @manifest_version.setter + def manifest_version(self, version: str) -> None: + if version not in SUPPORTED_MANIFEST_VERSIONS: + raise ValueError("Unsupported version") + self._store[MK_MANIFEST_VERSION] = version + + def installations( + self, + *, + create_if_absent: bool = True, + ) -> MutableYAMLInstallationsDefinition: + d = MutableYAMLInstallationsDefinition(self._store, MK_INSTALLATIONS) + if create_if_absent: + d.create_definition_if_missing() + return d + + def manifest_definitions( + self, + *, + create_if_absent: bool = True, + ) -> MutableYAMLManifestDefinitions: + d = MutableYAMLManifestDefinitions(self._store, MK_MANIFEST_DEFINITIONS) + if create_if_absent: + d.create_definition_if_missing() + return d + + def package( + self, name: str, *, create_if_absent: bool = True + ) -> MutableYAMLPackageDefinition: + if MK_PACKAGES not in self._store: + self._store[MK_PACKAGES] = CommentedMap() + packages_store = self._store[MK_PACKAGES] + package = packages_store.get(name) + if package is None: + if not create_if_absent: + raise KeyError(name) + assert packages_store is not None + d = MutableYAMLPackageDefinition(packages_store, name) + d.create_definition() + else: + d = MutableYAMLPackageDefinition(packages_store, name) + return d + + def write_to(self, fd) -> None: + MANIFEST_YAML.dump(self._store, fd) + + +def _describe_missing_path(entry: VirtualPath) -> str: + if entry.is_dir: + return f"{entry.fs_path}/ (empty directory; possible integration point)" + if entry.is_symlink: + target = os.readlink(entry.fs_path) + return f"{entry.fs_path} (symlink; links to {target})" + if entry.is_file: + return f"{entry.fs_path} (file)" + return f"{entry.fs_path} (other!? Probably not supported by debputy and may need a `remove`)" + + +def _detect_missing_installations( + path_matcher: SourcePathMatcher, + search_dir: VirtualPath, +) -> None: + if not os.path.isdir(search_dir.fs_path): + return + missing = list(path_matcher.detect_missing(search_dir)) + if not missing: + return + + _warn( + f"The following paths were present in {search_dir.fs_path}, but not installed (nor explicitly discarded)." + ) + _warn("") + for entry in missing: + desc = _describe_missing_path(entry) + _warn(f" * {desc}") + _warn("") + + excl = textwrap.dedent( + """\ + - discard: "*" + """ + ) + + _error( + "Please review the list and add either install rules or exclusions to `installations` in" + " debian/debputy.manifest. If you do not need any of these paths, add the following to the" + f" end of your 'installations`:\n\n{excl}\n" + ) + + +def _list_automatic_discard_rules(path_matcher: SourcePathMatcher) -> None: + used_discard_rules = path_matcher.used_auto_discard_rules + # Discard rules can match and then be overridden. In that case, they appear + # but have 0 matches. + if not sum((len(v) for v in used_discard_rules.values()), 0): + return + _info("The following automatic discard rules were triggered:") + example_path: Optional[str] = None + for rule in sorted(used_discard_rules): + for fs_path in sorted(used_discard_rules[rule]): + if example_path is None: + example_path = fs_path + _info(f" * {rule} -> {fs_path}") + assert example_path is not None + _info("") + _info( + "Note that some of these may have been overruled. The overrule detection logic is not" + ) + _info("100% reliable.") + _info("") + _info( + "You can overrule an automatic discard rule by explicitly listing the path. As an example:" + ) + _info(" installations:") + _info(" - install:") + _info(f" source: {example_path}") + + +def _install_everything_from_source_dir_if_present( + dctrl_bin: BinaryPackage, + substitution: Substitution, + path_matcher: SourcePathMatcher, + install_rule_context: InstallRuleContext, + source_condition_context: ConditionContext, + source_dir: VirtualPath, + *, + into_dir: Optional[VirtualPath] = None, +) -> None: + attribute_path = AttributePath.builtin_path()[f"installing {source_dir.fs_path}"] + pkg_set = frozenset([dctrl_bin]) + install_rule = InstallRule.install_dest( + [FileSystemMatchRule.from_path_match("*", attribute_path, substitution)], + None, + pkg_set, + f"Built-in; install everything from {source_dir.fs_path} into {dctrl_bin.name}", + None, + ) + pkg_search_dir: Tuple[SearchDir] = ( + SearchDir( + source_dir, + pkg_set, + ), + ) + replacements = { + "search_dirs": pkg_search_dir, + } + if into_dir is not None: + binary_package_contexts = dict(install_rule_context.binary_package_contexts) + updated = binary_package_contexts[dctrl_bin.name].replace(fs_root=into_dir) + binary_package_contexts[dctrl_bin.name] = updated + replacements["binary_package_contexts"] = binary_package_contexts + + fake_install_rule_context = install_rule_context.replace(**replacements) + try: + install_rule.perform_install( + path_matcher, + fake_install_rule_context, + source_condition_context, + ) + except ( + NoMatchForInstallPatternError, + PathAlreadyInstalledOrDiscardedError, + ): + # Empty directory or everything excluded by default; ignore the error + pass + + +class HighLevelManifest: + def __init__( + self, + manifest_path: str, + mutable_manifest: Optional[MutableYAMLManifest], + install_rules: Optional[List[InstallRule]], + source_package: SourcePackage, + binary_packages: Mapping[str, BinaryPackage], + substitution: Substitution, + package_transformations: Mapping[str, PackageTransformationDefinition], + dpkg_architecture_variables: DpkgArchitectureBuildProcessValuesTable, + dpkg_arch_query_table: DpkgArchTable, + build_env: DebBuildOptionsAndProfiles, + plugin_provided_feature_set: PluginProvidedFeatureSet, + debian_dir: VirtualPath, + ) -> None: + self.manifest_path = manifest_path + self.mutable_manifest = mutable_manifest + self._install_rules = install_rules + self._source_package = source_package + self._binary_packages = binary_packages + self.substitution = substitution + self.package_transformations = package_transformations + self._dpkg_architecture_variables = dpkg_architecture_variables + self._dpkg_arch_query_table = dpkg_arch_query_table + self._build_env = build_env + self._used_for: Set[str] = set() + self._plugin_provided_feature_set = plugin_provided_feature_set + self._debian_dir = debian_dir + + def source_version(self, include_binnmu_version: bool = True) -> str: + # TODO: There should an easier way to determine the source version; really. + version_var = "{{DEB_VERSION}}" + if not include_binnmu_version: + version_var = "{{_DEBPUTY_INTERNAL_NON_BINNMU_SOURCE}}" + try: + return self.substitution.substitute( + version_var, "internal (resolve version)" + ) + except DebputySubstitutionError as e: + raise AssertionError(f"Could not resolve {version_var}") from e + + @property + def debian_dir(self) -> VirtualPath: + return self._debian_dir + + @property + def dpkg_architecture_variables(self) -> DpkgArchitectureBuildProcessValuesTable: + return self._dpkg_architecture_variables + + @property + def build_env(self) -> DebBuildOptionsAndProfiles: + return self._build_env + + @property + def plugin_provided_feature_set(self) -> PluginProvidedFeatureSet: + return self._plugin_provided_feature_set + + @property + def active_packages(self) -> Iterable[BinaryPackage]: + yield from (p for p in self._binary_packages.values() if p.should_be_acted_on) + + @property + def all_packages(self) -> Iterable[BinaryPackage]: + yield from self._binary_packages.values() + + def package_state_for(self, package: str) -> PackageTransformationDefinition: + return self.package_transformations[package] + + def _detect_doc_main_package_for(self, package: BinaryPackage) -> BinaryPackage: + name = package.name + # If it is not a -doc package, then docs should be installed + # under its own package name. + if not name.endswith("-doc"): + return package + name = name[:-4] + main_package = self._binary_packages.get(name) + if main_package: + return main_package + if name.startswith("lib"): + dev_pkg = self._binary_packages.get(f"{name}-dev") + if dev_pkg: + return dev_pkg + + # If we found no better match; default to the doc package itself. + return package + + def perform_installations( + self, + *, + install_request_context: Optional[InstallSearchDirContext] = None, + enable_manifest_installation_feature: bool = True, + ) -> PackageDataTable: + package_data_dict = {} + package_data_table = PackageDataTable(package_data_dict) + if install_request_context is None: + + @functools.lru_cache(None) + def _as_path(fs_path: str) -> VirtualPath: + return FSROOverlay.create_root_dir(".", fs_path) + + dtmp_dir = _as_path("debian/tmp") + source_root_dir = _as_path(".") + into = frozenset(self._binary_packages.values()) + default_search_dirs = [dtmp_dir] + per_package_search_dirs = { + t.binary_package: [_as_path(f.match_rule.path) for f in t.search_dirs] + for t in self.package_transformations.values() + if t.search_dirs is not None + } + search_dirs = _determine_search_dir_order( + per_package_search_dirs, + into, + default_search_dirs, + source_root_dir, + ) + check_for_uninstalled_dirs = tuple( + s.search_dir + for s in search_dirs + if s.search_dir.fs_path != source_root_dir.fs_path + ) + _present_installation_dirs(search_dirs, check_for_uninstalled_dirs, into) + else: + dtmp_dir = None + search_dirs = install_request_context.search_dirs + into = frozenset(self._binary_packages.values()) + seen = set() + for search_dir in search_dirs: + seen.update(search_dir.applies_to) + + missing = into - seen + if missing: + names = ", ".join(p.name for p in missing) + raise ValueError( + f"The following package(s) had no search dirs: {names}." + " (Generally, the source root would be applicable to all packages)" + ) + extra_names = seen - into + if extra_names: + names = ", ".join(p.name for p in extra_names) + raise ValueError( + f"The install_request_context referenced the following unknown package(s): {names}" + ) + + check_for_uninstalled_dirs = ( + install_request_context.check_for_uninstalled_dirs + ) + + install_rule_context = InstallRuleContext(search_dirs) + + if ( + enable_manifest_installation_feature + and self._install_rules is None + and dtmp_dir is not None + and os.path.isdir(dtmp_dir.fs_path) + ): + msg = ( + "The build system appears to have provided the output of upstream build system's" + " install in debian/tmp. However, these are no provisions for debputy to install" + " any of that into any of the debian packages listed in debian/control." + " To avoid accidentally creating empty packages, debputy will insist that you " + " explicitly define an empty installation definition if you did not want to " + " install any of those files even though they have been provided." + ' Example: "installations: []"' + ) + _error(msg) + elif ( + not enable_manifest_installation_feature and self._install_rules is not None + ): + _error( + f"The `installations` feature cannot be used in {self.manifest_path} with this integration mode." + f" Please remove or comment out the `installations` keyword." + ) + + for dctrl_bin in self.all_packages: + package = dctrl_bin.name + doc_main_package = self._detect_doc_main_package_for(dctrl_bin) + + install_rule_context[package] = BinaryPackageInstallRuleContext( + dctrl_bin, + FSRootDir(), + doc_main_package, + ) + + if enable_manifest_installation_feature: + discard_rules = list( + self.plugin_provided_feature_set.auto_discard_rules.values() + ) + else: + discard_rules = [ + self.plugin_provided_feature_set.auto_discard_rules["debian-dir"] + ] + path_matcher = SourcePathMatcher(discard_rules) + + source_condition_context = ConditionContext( + binary_package=None, + substitution=self.substitution, + build_env=self._build_env, + dpkg_architecture_variables=self._dpkg_architecture_variables, + dpkg_arch_query_table=self._dpkg_arch_query_table, + ) + + for dctrl_bin in self.active_packages: + package = dctrl_bin.name + if install_request_context: + build_system_staging_dir = install_request_context.debian_pkg_dirs.get( + package + ) + else: + build_system_staging_dir_fs_path = os.path.join("debian", package) + if os.path.isdir(build_system_staging_dir_fs_path): + build_system_staging_dir = FSROOverlay.create_root_dir( + ".", + build_system_staging_dir_fs_path, + ) + else: + build_system_staging_dir = None + + if build_system_staging_dir is not None: + _install_everything_from_source_dir_if_present( + dctrl_bin, + self.substitution, + path_matcher, + install_rule_context, + source_condition_context, + build_system_staging_dir, + ) + + if self._install_rules: + # FIXME: Check that every install rule remains used after transformations have run. + # What we want to check is transformations do not exclude everything from an install + # rule. The hard part here is that renaming (etc.) is fine, so we cannot 1:1 string + # match. + for install_rule in self._install_rules: + install_rule.perform_install( + path_matcher, + install_rule_context, + source_condition_context, + ) + + if enable_manifest_installation_feature: + for search_dir in check_for_uninstalled_dirs: + _detect_missing_installations(path_matcher, search_dir) + + for dctrl_bin in self.all_packages: + package = dctrl_bin.name + binary_install_rule_context = install_rule_context[package] + build_system_pkg_staging_dir = os.path.join("debian", package) + fs_root = binary_install_rule_context.fs_root + + context = self.package_transformations[package] + if dctrl_bin.should_be_acted_on and enable_manifest_installation_feature: + for special_install_rule in context.install_rules: + special_install_rule.perform_install( + path_matcher, + install_rule_context, + source_condition_context, + ) + + if dctrl_bin.should_be_acted_on: + self.apply_fs_transformations(package, fs_root) + substvars_file = f"debian/{package}.substvars" + substvars = FlushableSubstvars.load_from_path( + substvars_file, missing_ok=True + ) + # We do not want to touch the substvars file (non-clean rebuild contamination) + substvars.substvars_path = None + control_output_dir = generated_content_dir( + package=dctrl_bin, subdir_key="DEBIAN" + ) + else: + substvars = FlushableSubstvars() + control_output_dir = None + + udeb_package = self._binary_packages.get(f"{package}-udeb") + if udeb_package and not udeb_package.is_udeb: + udeb_package = None + + package_metadata_context = PackageProcessingContextProvider( + self, + dctrl_bin, + udeb_package, + package_data_table, + # FIXME: source_package + ) + + ctrl_creator = BinaryCtrlAccessorProviderCreator( + package_metadata_context, + substvars, + context.maintscript_snippets, + context.substitution, + ) + + if not enable_manifest_installation_feature: + assert_no_dbgsym_migration(dctrl_bin) + dh_dbgsym_root_fs = FSROOverlay.create_root_dir( + "", dhe_dbgsym_root_dir(dctrl_bin) + ) + dbgsym_root_fs = FSRootDir() + _install_everything_from_source_dir_if_present( + dctrl_bin, + self.substitution, + path_matcher, + install_rule_context, + source_condition_context, + dh_dbgsym_root_fs, + into_dir=dbgsym_root_fs, + ) + dbgsym_build_ids = read_dbgsym_file(dctrl_bin) + dbgsym_info = DbgsymInfo( + dbgsym_root_fs, + dbgsym_build_ids, + ) + else: + dbgsym_info = DbgsymInfo( + FSRootDir(), + [], + ) + + package_data_dict[package] = BinaryPackageData( + self._source_package, + dctrl_bin, + build_system_pkg_staging_dir, + control_output_dir, + fs_root, + substvars, + package_metadata_context, + ctrl_creator, + dbgsym_info, + ) + + _list_automatic_discard_rules(path_matcher) + + return package_data_table + + def condition_context( + self, binary_package: Optional[Union[BinaryPackage, str]] + ) -> ConditionContext: + if binary_package is None: + return ConditionContext( + binary_package=None, + substitution=self.substitution, + build_env=self._build_env, + dpkg_architecture_variables=self._dpkg_architecture_variables, + dpkg_arch_query_table=self._dpkg_arch_query_table, + ) + if not isinstance(binary_package, str): + binary_package = binary_package.name + + package_transformation = self.package_transformations[binary_package] + return ConditionContext( + binary_package=package_transformation.binary_package, + substitution=package_transformation.substitution, + build_env=self._build_env, + dpkg_architecture_variables=self._dpkg_architecture_variables, + dpkg_arch_query_table=self._dpkg_arch_query_table, + ) + + def apply_fs_transformations( + self, + package: str, + fs_root: FSPath, + ) -> None: + if package in self._used_for: + raise ValueError( + f"data.tar contents for {package} has already been finalized!?" + ) + if package not in self.package_transformations: + raise ValueError( + f'The package "{package}" was not relevant for the manifest!?' + ) + package_transformation = self.package_transformations[package] + condition_context = ConditionContext( + binary_package=package_transformation.binary_package, + substitution=package_transformation.substitution, + build_env=self._build_env, + dpkg_architecture_variables=self._dpkg_architecture_variables, + dpkg_arch_query_table=self._dpkg_arch_query_table, + ) + norm_rules = list( + builtin_mode_normalization_rules( + self._dpkg_architecture_variables, + package_transformation.binary_package, + package_transformation.substitution, + ) + ) + norm_mode_transformation_rule = ModeNormalizationTransformationRule(norm_rules) + norm_mode_transformation_rule.transform_file_system(fs_root, condition_context) + for transformation in package_transformation.transformations: + transformation.transform_file_system(fs_root, condition_context) + interpreter_normalization = NormalizeShebangLineTransformation() + interpreter_normalization.transform_file_system(fs_root, condition_context) + + def finalize_data_tar_contents( + self, + package: str, + fs_root: FSPath, + clamp_mtime_to: int, + ) -> IntermediateManifest: + if package in self._used_for: + raise ValueError( + f"data.tar contents for {package} has already been finalized!?" + ) + if package not in self.package_transformations: + raise ValueError( + f'The package "{package}" was not relevant for the manifest!?' + ) + self._used_for.add(package) + + # At this point, there so be no further mutations to the file system (because the will not + # be present in the intermediate manifest) + cast("FSRootDir", fs_root).is_read_write = False + + intermediate_manifest = list( + _generate_intermediate_manifest( + fs_root, + clamp_mtime_to, + ) + ) + return intermediate_manifest + + def apply_to_binary_staging_directory( + self, + package: str, + fs_root: FSPath, + clamp_mtime_to: int, + ) -> IntermediateManifest: + self.apply_fs_transformations(package, fs_root) + return self.finalize_data_tar_contents(package, fs_root, clamp_mtime_to) + + +@dataclasses.dataclass(slots=True) +class SearchDirOrderState: + search_dir: VirtualPath + applies_to: Union[Set[BinaryPackage], FrozenSet[BinaryPackage]] = dataclasses.field( + default_factory=set + ) + after: Set[str] = dataclasses.field(default_factory=set) + + +def _present_installation_dirs( + search_dirs: Sequence[SearchDir], + checked_missing_dirs: Sequence[VirtualPath], + all_pkgs: FrozenSet[BinaryPackage], +) -> None: + _info("The following directories are considered search dirs (in order):") + max_len = max((len(s.search_dir.fs_path) for s in search_dirs), default=1) + for search_dir in search_dirs: + applies_to = "" + if search_dir.applies_to < all_pkgs: + names = ", ".join(p.name for p in search_dir.applies_to) + applies_to = f" [only applicable to: {names}]" + remark = "" + if not os.path.isdir(search_dir.search_dir.fs_path): + remark = " (skipped; absent)" + _info(f" * {search_dir.search_dir.fs_path:{max_len}}{applies_to}{remark}") + + if checked_missing_dirs: + _info('The following directories are considered for "not-installed" paths;') + for d in checked_missing_dirs: + remark = "" + if not os.path.isdir(d.fs_path): + remark = " (skipped; absent)" + _info(f" * {d.fs_path:{max_len}}{remark}") + + +def _determine_search_dir_order( + requested: Mapping[BinaryPackage, List[VirtualPath]], + all_pkgs: FrozenSet[BinaryPackage], + default_search_dirs: List[VirtualPath], + source_root: VirtualPath, +) -> Sequence[SearchDir]: + search_dir_table = {} + assert requested.keys() <= all_pkgs + for pkg in all_pkgs: + paths = requested.get(pkg, default_search_dirs) + previous_search_dir: Optional[SearchDirOrderState] = None + for path in paths: + try: + search_dir_state = search_dir_table[path.fs_path] + except KeyError: + search_dir_state = SearchDirOrderState(path) + search_dir_table[path.fs_path] = search_dir_state + search_dir_state.applies_to.add(pkg) + if previous_search_dir is not None: + search_dir_state.after.add(previous_search_dir.search_dir.fs_path) + previous_search_dir = search_dir_state + + search_dirs_in_order = [] + released = set() + remaining = set() + for search_dir_state in search_dir_table.values(): + if not (search_dir_state.after <= released): + remaining.add(search_dir_state.search_dir.fs_path) + continue + search_dirs_in_order.append(search_dir_state) + released.add(search_dir_state.search_dir.fs_path) + + while remaining: + current_released = len(released) + for fs_path in remaining: + search_dir_state = search_dir_table[fs_path] + if not search_dir_state.after.issubset(released): + remaining.add(search_dir_state.search_dir.fs_path) + continue + search_dirs_in_order.append(search_dir_state) + released.add(search_dir_state.search_dir.fs_path) + + if current_released == len(released): + names = ", ".join(remaining) + _error( + f"There is a circular dependency (somewhere) between the search dirs: {names}." + " Note that the search directories across all packages have to be ordered (and the" + " source root should generally be last)" + ) + remaining -= released + + search_dirs_in_order.append( + SearchDirOrderState( + source_root, + all_pkgs, + ) + ) + + return tuple( + # Avoid duplicating all_pkgs + SearchDir( + s.search_dir, + frozenset(s.applies_to) if s.applies_to != all_pkgs else all_pkgs, + ) + for s in search_dirs_in_order + ) diff --git a/src/debputy/highlevel_manifest_parser.py b/src/debputy/highlevel_manifest_parser.py new file mode 100644 index 0000000..6181603 --- /dev/null +++ b/src/debputy/highlevel_manifest_parser.py @@ -0,0 +1,546 @@ +import collections +import contextlib +from typing import ( + Optional, + Dict, + Callable, + List, + Any, + Union, + Mapping, + IO, + Iterator, + cast, + Tuple, +) + +from debian.debian_support import DpkgArchTable +from ruamel.yaml import YAMLError + +from debputy.highlevel_manifest import ( + HighLevelManifest, + PackageTransformationDefinition, + MutableYAMLManifest, + MANIFEST_YAML, +) +from debputy.maintscript_snippet import ( + MaintscriptSnippet, + STD_CONTROL_SCRIPTS, + MaintscriptSnippetContainer, +) +from debputy.packages import BinaryPackage, SourcePackage +from debputy.path_matcher import ( + MatchRuleType, + ExactFileSystemPath, + MatchRule, +) +from debputy.substitution import Substitution +from debputy.util import ( + _normalize_path, + escape_shell, + assume_not_none, +) +from debputy.util import _warn, _info +from ._deb_options_profiles import DebBuildOptionsAndProfiles +from .architecture_support import DpkgArchitectureBuildProcessValuesTable +from .filesystem_scan import FSROOverlay +from .installations import InstallRule, PPFInstallRule +from .manifest_parser.exceptions import ManifestParseException +from .manifest_parser.parser_data import ParserContextData +from .manifest_parser.util import AttributePath +from .packager_provided_files import detect_all_packager_provided_files +from .plugin.api import VirtualPath +from .plugin.api.impl_types import ( + TP, + TTP, + DispatchingTableParser, + OPARSER_PACKAGES, + OPARSER_MANIFEST_ROOT, +) +from .plugin.api.feature_set import PluginProvidedFeatureSet + +try: + from Levenshtein import distance +except ImportError: + + def _detect_possible_typo( + _d, + _key, + _attribute_parent_path: AttributePath, + required: bool, + ) -> None: + if required: + _info( + "Install python3-levenshtein to have debputy try to detect typos in the manifest." + ) + +else: + + def _detect_possible_typo( + d, + key, + _attribute_parent_path: AttributePath, + _required: bool, + ) -> None: + k_len = len(key) + for actual_key in d: + if abs(k_len - len(actual_key)) > 2: + continue + d = distance(key, actual_key) + if d > 2: + continue + path = _attribute_parent_path.path + ref = f'at "{path}"' if path else "at the manifest root level" + _warn( + f'Possible typo: The key "{actual_key}" should probably have been "{key}" {ref}' + ) + + +def _per_package_subst_variables( + p: BinaryPackage, + *, + name: Optional[str] = None, +) -> Dict[str, str]: + return { + "PACKAGE": name if name is not None else p.name, + } + + +class HighLevelManifestParser(ParserContextData): + def __init__( + self, + manifest_path: str, + source_package: SourcePackage, + binary_packages: Mapping[str, BinaryPackage], + substitution: Substitution, + dpkg_architecture_variables: DpkgArchitectureBuildProcessValuesTable, + dpkg_arch_query_table: DpkgArchTable, + build_env: DebBuildOptionsAndProfiles, + plugin_provided_feature_set: PluginProvidedFeatureSet, + *, + # Available for testing purposes only + debian_dir: Union[str, VirtualPath] = "./debian", + ): + self.manifest_path = manifest_path + self._source_package = source_package + self._binary_packages = binary_packages + self._mutable_yaml_manifest: Optional[MutableYAMLManifest] = None + # In source context, some variables are known to be unresolvable. Record this, so + # we can give better error messages. + self._substitution = substitution + self._dpkg_architecture_variables = dpkg_architecture_variables + self._dpkg_arch_query_table = dpkg_arch_query_table + self._build_env = build_env + self._package_state_stack: List[PackageTransformationDefinition] = [] + self._plugin_provided_feature_set = plugin_provided_feature_set + self._declared_variables = {} + + if isinstance(debian_dir, str): + debian_dir = FSROOverlay.create_root_dir("debian", debian_dir) + + self._debian_dir = debian_dir + + # Delayed initialized; we rely on this delay to parse the variables. + self._all_package_states = None + + self._install_rules: Optional[List[InstallRule]] = None + self._ownership_caches_loaded = False + self._used = False + + def _ensure_package_states_is_initialized(self) -> None: + if self._all_package_states is not None: + return + substitution = self._substitution + binary_packages = self._binary_packages + assert self._all_package_states is None + + self._all_package_states = { + n: PackageTransformationDefinition( + binary_package=p, + substitution=substitution.with_extra_substitutions( + **_per_package_subst_variables(p) + ), + is_auto_generated_package=False, + maintscript_snippets=collections.defaultdict( + MaintscriptSnippetContainer + ), + ) + for n, p in binary_packages.items() + } + for n, p in binary_packages.items(): + dbgsym_name = f"{n}-dbgsym" + if dbgsym_name in self._all_package_states: + continue + self._all_package_states[dbgsym_name] = PackageTransformationDefinition( + binary_package=p, + substitution=substitution.with_extra_substitutions( + **_per_package_subst_variables(p, name=dbgsym_name) + ), + is_auto_generated_package=True, + maintscript_snippets=collections.defaultdict( + MaintscriptSnippetContainer + ), + ) + + @property + def binary_packages(self) -> Mapping[str, BinaryPackage]: + return self._binary_packages + + @property + def _package_states(self) -> Mapping[str, PackageTransformationDefinition]: + assert self._all_package_states is not None + return self._all_package_states + + @property + def dpkg_architecture_variables(self) -> DpkgArchitectureBuildProcessValuesTable: + return self._dpkg_architecture_variables + + @property + def dpkg_arch_query_table(self) -> DpkgArchTable: + return self._dpkg_arch_query_table + + @property + def build_env(self) -> DebBuildOptionsAndProfiles: + return self._build_env + + def build_manifest(self) -> HighLevelManifest: + if self._used: + raise TypeError("build_manifest can only be called once!") + self._used = True + self._ensure_package_states_is_initialized() + for var, attribute_path in self._declared_variables.items(): + if not self.substitution.is_used(var): + raise ManifestParseException( + f'The variable "{var}" is unused. Either use it or remove it.' + f" The variable was declared at {attribute_path.path}." + ) + if isinstance(self, YAMLManifestParser) and self._mutable_yaml_manifest is None: + self._mutable_yaml_manifest = MutableYAMLManifest.empty_manifest() + all_packager_provided_files = detect_all_packager_provided_files( + self._plugin_provided_feature_set.packager_provided_files, + self._debian_dir, + self.binary_packages, + ) + + for package in self._package_states: + with self.binary_package_context(package) as context: + if not context.is_auto_generated_package: + ppf_result = all_packager_provided_files[package] + if ppf_result.auto_installable: + context.install_rules.append( + PPFInstallRule( + context.binary_package, + context.substitution, + ppf_result.auto_installable, + ) + ) + context.reserved_packager_provided_files.update( + ppf_result.reserved_only + ) + self._transform_dpkg_maintscript_helpers_to_snippets() + + return HighLevelManifest( + self.manifest_path, + self._mutable_yaml_manifest, + self._install_rules, + self._source_package, + self.binary_packages, + self.substitution, + self._package_states, + self._dpkg_architecture_variables, + self._dpkg_arch_query_table, + self._build_env, + self._plugin_provided_feature_set, + self._debian_dir, + ) + + @contextlib.contextmanager + def binary_package_context( + self, package_name: str + ) -> Iterator[PackageTransformationDefinition]: + if package_name not in self._package_states: + self._error( + f'The package "{package_name}" is not present in the debian/control file (could not find' + f' "Package: {package_name}" in a binary stanza) nor is it a -dbgsym package for one' + " for a package in debian/control." + ) + package_state = self._package_states[package_name] + self._package_state_stack.append(package_state) + ps_len = len(self._package_state_stack) + yield package_state + if ps_len != len(self._package_state_stack): + raise RuntimeError("Internal error: Unbalanced stack manipulation detected") + self._package_state_stack.pop() + + def dispatch_parser_table_for(self, rule_type: TTP) -> DispatchingTableParser[TP]: + t = self._plugin_provided_feature_set.dispatchable_table_parsers.get(rule_type) + if t is None: + raise AssertionError( + f"Internal error: No dispatching parser for {rule_type.__name__}" + ) + return t + + @property + def substitution(self) -> Substitution: + if self._package_state_stack: + return self._package_state_stack[-1].substitution + return self._substitution + + def add_extra_substitution_variables( + self, + **extra_substitutions: Tuple[str, AttributePath], + ) -> Substitution: + if self._package_state_stack or self._all_package_states is not None: + # For one, it would not "bubble up" correctly when added to the lowest stack. + # And if it is not added to the lowest stack, then you get errors about it being + # unknown as soon as you leave the stack (which is weird for the user when + # the variable is something known, sometimes not) + raise RuntimeError("Cannot use add_extra_substitution from this state") + for key, (_, path) in extra_substitutions.items(): + self._declared_variables[key] = path + self._substitution = self._substitution.with_extra_substitutions( + **{k: v[0] for k, v in extra_substitutions.items()} + ) + return self._substitution + + @property + def current_binary_package_state(self) -> PackageTransformationDefinition: + if not self._package_state_stack: + raise RuntimeError("Invalid state: Not in a binary package context") + return self._package_state_stack[-1] + + @property + def is_in_binary_package_state(self) -> bool: + return bool(self._package_state_stack) + + def _transform_dpkg_maintscript_helpers_to_snippets(self) -> None: + package_state = self.current_binary_package_state + for dmh in package_state.dpkg_maintscript_helper_snippets: + snippet = MaintscriptSnippet( + definition_source=dmh.definition_source, + snippet=f'dpkg-maintscript-helper {escape_shell(*dmh.cmdline)} -- "$@"\n', + ) + for script in STD_CONTROL_SCRIPTS: + package_state.maintscript_snippets[script].append(snippet) + + def normalize_path( + self, + path: str, + definition_source: AttributePath, + *, + allow_root_dir_match: bool = False, + ) -> ExactFileSystemPath: + try: + normalized = _normalize_path(path) + except ValueError: + self._error( + f'The path "{path}" provided in {definition_source.path} should be relative to the root of the' + ' package and not use any ".." or "." segments.' + ) + if normalized == "." and not allow_root_dir_match: + self._error( + "Manifests must not change the root directory of the deb file. Please correct" + f' "{definition_source.path}" (path: "{path}) in {self.manifest_path}' + ) + return ExactFileSystemPath( + self.substitution.substitute(normalized, definition_source.path) + ) + + def parse_path_or_glob( + self, + path_or_glob: str, + definition_source: AttributePath, + ) -> MatchRule: + match_rule = MatchRule.from_path_or_glob( + path_or_glob, definition_source.path, substitution=self.substitution + ) + # NB: "." and "/" will be translated to MATCH_ANYTHING by MatchRule.from_path_or_glob, + # so there is no need to check for an exact match on "." like in normalize_path. + if match_rule.rule_type == MatchRuleType.MATCH_ANYTHING: + self._error( + f'The chosen match rule "{path_or_glob}" matches everything (including the deb root directory).' + f' Please correct "{definition_source.path}" (path: "{path_or_glob}) in {self.manifest_path} to' + f' something that matches "less" than everything.' + ) + return match_rule + + def parse_manifest(self) -> HighLevelManifest: + raise NotImplementedError + + +class YAMLManifestParser(HighLevelManifestParser): + def _optional_key( + self, + d: Mapping[str, Any], + key: str, + attribute_parent_path: AttributePath, + expected_type=None, + default_value=None, + ): + v = d.get(key) + if v is None: + _detect_possible_typo(d, key, attribute_parent_path, False) + return default_value + if expected_type is not None: + return self._ensure_value_is_type( + v, expected_type, key, attribute_parent_path + ) + return v + + def _required_key( + self, + d: Mapping[str, Any], + key: str, + attribute_parent_path: AttributePath, + expected_type=None, + extra: Optional[Union[str, Callable[[], str]]] = None, + ): + v = d.get(key) + if v is None: + _detect_possible_typo(d, key, attribute_parent_path, True) + if extra is not None: + msg = extra if isinstance(extra, str) else extra() + extra_info = " " + msg + else: + extra_info = "" + self._error( + f'Missing required key {key} at {attribute_parent_path.path} in manifest "{self.manifest_path}.' + f"{extra_info}" + ) + + if expected_type is not None: + return self._ensure_value_is_type( + v, expected_type, key, attribute_parent_path + ) + return v + + def _ensure_value_is_type( + self, + v, + t, + key: Union[str, int, AttributePath], + attribute_parent_path: Optional[AttributePath], + ): + if v is None: + return None + if not isinstance(v, t): + if isinstance(t, tuple): + t_msg = "one of: " + ", ".join(x.__name__ for x in t) + else: + t_msg = f"a {t.__name__}" + key_path = ( + key.path + if isinstance(key, AttributePath) + else assume_not_none(attribute_parent_path)[key].path + ) + self._error( + f'The key {key_path} must be {t_msg} in manifest "{self.manifest_path}"' + ) + return v + + def from_yaml_dict(self, yaml_data: object) -> "HighLevelManifest": + attribute_path = AttributePath.root_path() + manifest_root_parser = ( + self._plugin_provided_feature_set.dispatchable_object_parsers[ + OPARSER_MANIFEST_ROOT + ] + ) + parsed_data = cast( + "ManifestRootRule", + manifest_root_parser.parse( + yaml_data, + attribute_path, + parser_context=self, + ), + ) + + packages_dict = parsed_data.get("packages", {}) + install_rules = parsed_data.get("installations") + if install_rules: + self._install_rules = install_rules + packages_parent_path = attribute_path["packages"] + for package_name_raw, v in packages_dict.items(): + definition_source = packages_parent_path[package_name_raw] + package_name = package_name_raw + if "{{" in package_name: + package_name = self.substitution.substitute( + package_name_raw, + definition_source.path, + ) + + with self.binary_package_context(package_name) as package_state: + if package_state.is_auto_generated_package: + # Maybe lift (part) of this restriction. + self._error( + f'Cannot define rules for package "{package_name}" (at {definition_source.path}). It is an' + " auto-generated package." + ) + package_rule_parser = ( + self._plugin_provided_feature_set.dispatchable_object_parsers[ + OPARSER_PACKAGES + ] + ) + parsed = cast( + "BinaryPackageRule", + package_rule_parser.parse( + v, definition_source, parser_context=self + ), + ) + binary_version = parsed.get("binary-version") + if binary_version is not None: + package_state.binary_version = ( + package_state.substitution.substitute( + binary_version, + definition_source["binary-version"].path, + ) + ) + search_dirs = parsed.get("installation_search_dirs") + if search_dirs is not None: + package_state.search_dirs = search_dirs + transformations = parsed.get("transformations") + conffile_management = parsed.get("conffile_management") + if transformations: + package_state.transformations.extend(transformations) + if conffile_management: + package_state.dpkg_maintscript_helper_snippets.extend( + conffile_management + ) + return self.build_manifest() + + def _parse_manifest(self, fd: Union[IO[bytes], str]) -> HighLevelManifest: + try: + data = MANIFEST_YAML.load(fd) + except YAMLError as e: + msg = str(e) + lines = msg.splitlines(keepends=True) + i = -1 + for i, line in enumerate(lines): + # Avoid an irrelevant "how do configure the YAML parser" message, which the + # user cannot use. + if line.startswith("To suppress this check"): + break + if i > -1 and len(lines) > i + 1: + lines = lines[:i] + msg = "".join(lines) + msg = msg.rstrip() + msg += ( + f"\n\nYou can use `yamllint -d relaxed {escape_shell(self.manifest_path)}` to validate" + " the YAML syntax. The yamllint tool also supports style rules for YAML documents" + " (such as indentation rules) in case that is of interest." + ) + raise ManifestParseException( + f"Could not parse {self.manifest_path} as a YAML document: {msg}" + ) from e + self._mutable_yaml_manifest = MutableYAMLManifest(data) + return self.from_yaml_dict(data) + + def parse_manifest( + self, + *, + fd: Optional[Union[IO[bytes], str]] = None, + ) -> HighLevelManifest: + if fd is None: + with open(self.manifest_path, "rb") as fd: + return self._parse_manifest(fd) + else: + return self._parse_manifest(fd) diff --git a/src/debputy/installations.py b/src/debputy/installations.py new file mode 100644 index 0000000..2310cfa --- /dev/null +++ b/src/debputy/installations.py @@ -0,0 +1,1162 @@ +import collections +import dataclasses +import os.path +import re +from enum import IntEnum +from typing import ( + List, + Dict, + FrozenSet, + Callable, + Union, + Iterator, + Tuple, + Set, + Sequence, + Optional, + Iterable, + TYPE_CHECKING, + cast, + Any, + Mapping, +) + +from debputy.exceptions import DebputyRuntimeError +from debputy.filesystem_scan import FSPath +from debputy.manifest_conditions import ( + ConditionContext, + ManifestCondition, + _BUILD_DOCS_BDO, +) +from debputy.manifest_parser.base_types import ( + FileSystemMatchRule, + FileSystemExactMatchRule, + DebputyDispatchableType, +) +from debputy.packages import BinaryPackage +from debputy.path_matcher import MatchRule, ExactFileSystemPath, MATCH_ANYTHING +from debputy.substitution import Substitution +from debputy.util import _error, _warn + +if TYPE_CHECKING: + from debputy.packager_provided_files import PackagerProvidedFile + from debputy.plugin.api import VirtualPath + from debputy.plugin.api.impl_types import PluginProvidedDiscardRule + + +_MAN_TH_LINE = re.compile(r'^[.]TH\s+\S+\s+"?(\d+[^"\s]*)"?') +_MAN_DT_LINE = re.compile(r"^[.]Dt\s+\S+\s+(\d+\S*)") +_MAN_SECTION_BASENAME = re.compile(r"[.]([1-9]\w*)(?:[.]gz)?$") +_MAN_REAL_SECTION = re.compile(r"^(\d+)") +_MAN_INST_BASENAME = re.compile(r"[.][^.]+$") +MAN_GUESS_LANG_FROM_PATH = re.compile( + r"(?:^|/)man/(?:([a-z][a-z](?:_[A-Z][A-Z])?)(?:\.[^/]+)?)?/man[1-9]/" +) +MAN_GUESS_FROM_BASENAME = re.compile(r"[.]([a-z][a-z](?:_[A-Z][A-Z])?)[.](?:[1-9]|man)") + + +class InstallRuleError(DebputyRuntimeError): + pass + + +class PathAlreadyInstalledOrDiscardedError(InstallRuleError): + @property + def path(self) -> str: + return cast("str", self.args[0]) + + @property + def into(self) -> FrozenSet[BinaryPackage]: + return cast("FrozenSet[BinaryPackage]", self.args[1]) + + @property + def definition_source(self) -> str: + return cast("str", self.args[2]) + + +class ExactPathMatchTwiceError(InstallRuleError): + @property + def path(self) -> str: + return cast("str", self.args[1]) + + @property + def into(self) -> BinaryPackage: + return cast("BinaryPackage", self.args[2]) + + @property + def definition_source(self) -> str: + return cast("str", self.args[3]) + + +class NoMatchForInstallPatternError(InstallRuleError): + @property + def pattern(self) -> str: + return cast("str", self.args[1]) + + @property + def search_dirs(self) -> Sequence["SearchDir"]: + return cast("Sequence[SearchDir]", self.args[2]) + + @property + def definition_source(self) -> str: + return cast("str", self.args[3]) + + +@dataclasses.dataclass(slots=True, frozen=True) +class SearchDir: + search_dir: "VirtualPath" + applies_to: FrozenSet[BinaryPackage] + + +@dataclasses.dataclass(slots=True, frozen=True) +class BinaryPackageInstallRuleContext: + binary_package: BinaryPackage + fs_root: FSPath + doc_main_package: BinaryPackage + + def replace(self, **changes: Any) -> "BinaryPackageInstallRuleContext": + return dataclasses.replace(self, **changes) + + +@dataclasses.dataclass(slots=True, frozen=True) +class InstallSearchDirContext: + search_dirs: Sequence[SearchDir] + check_for_uninstalled_dirs: Sequence["VirtualPath"] + # TODO: Support search dirs per-package + debian_pkg_dirs: Mapping[str, "VirtualPath"] = dataclasses.field( + default_factory=dict + ) + + +@dataclasses.dataclass(slots=True) +class InstallRuleContext: + # TODO: Search dirs should be per-package + search_dirs: Sequence[SearchDir] + binary_package_contexts: Dict[str, BinaryPackageInstallRuleContext] = ( + dataclasses.field(default_factory=dict) + ) + + def __getitem__(self, item: str) -> BinaryPackageInstallRuleContext: + return self.binary_package_contexts[item] + + def __setitem__(self, key: str, value: BinaryPackageInstallRuleContext) -> None: + self.binary_package_contexts[key] = value + + def replace(self, **changes: Any) -> "InstallRuleContext": + return dataclasses.replace(self, **changes) + + +@dataclasses.dataclass(slots=True, frozen=True) +class PathMatch: + path: "VirtualPath" + search_dir: "VirtualPath" + is_exact_match: bool + into: FrozenSet[BinaryPackage] + + +class DiscardState(IntEnum): + UNCHECKED = 0 + NOT_DISCARDED = 1 + DISCARDED_BY_PLUGIN_PROVIDED_RULE = 2 + DISCARDED_BY_MANIFEST_RULE = 3 + + +def _determine_manpage_section( + match_rule: PathMatch, + provided_section: Optional[int], + definition_source: str, +) -> Optional[str]: + section = str(provided_section) if provided_section is not None else None + if section is None: + detected_section = None + with open(match_rule.path.fs_path, "r") as fd: + for line in fd: + if not line.startswith((".TH", ".Dt")): + continue + + m = _MAN_DT_LINE.match(line) + if not m: + m = _MAN_TH_LINE.match(line) + if not m: + continue + detected_section = m.group(1) + if "." in detected_section: + _warn( + f"Ignoring detected section {detected_section} in {match_rule.path.fs_path}" + f" (detected via {definition_source}): It looks too much like a version" + ) + detected_section = None + break + if detected_section is None: + m = _MAN_SECTION_BASENAME.search(os.path.basename(match_rule.path.path)) + if m: + detected_section = m.group(1) + section = detected_section + + return section + + +def _determine_manpage_real_section( + match_rule: PathMatch, + section: Optional[str], + definition_source: str, +) -> int: + real_section = None + if section is not None: + m = _MAN_REAL_SECTION.match(section) + if m: + real_section = int(m.group(1)) + if real_section is None or real_section < 0 or real_section > 9: + if real_section is not None: + _warn( + f"Computed section for {match_rule.path.fs_path} was {real_section} (section: {section})," + f" which is not a valid section (must be between 1 and 9 incl.)" + ) + _error( + f"Could not determine the section for {match_rule.path.fs_path} automatically. The manpage" + f" was detected via {definition_source}. Consider using `section: <number>` to" + " explicitly declare the section. Keep in mind that it applies to all manpages for that" + " rule and you may have to split the rule into two for this reason." + ) + return real_section + + +def _determine_manpage_language( + match_rule: PathMatch, + provided_language: Optional[str], +) -> Optional[str]: + if provided_language is not None: + if provided_language not in ("derive-from-basename", "derive-from-path"): + return provided_language if provided_language != "C" else None + if provided_language == "derive-from-basename": + m = MAN_GUESS_FROM_BASENAME.search(match_rule.path.name) + if m is None: + return None + return m.group(1) + # Fall-through for derive-from-path case + m = MAN_GUESS_LANG_FROM_PATH.search(match_rule.path.path) + if m is None: + return None + return m.group(1) + + +def _dest_path_for_manpage( + provided_section: Optional[int], + provided_language: Optional[str], + definition_source: str, +) -> Callable[["PathMatch"], str]: + def _manpage_dest_path(match_rule: PathMatch) -> str: + inst_basename = _MAN_INST_BASENAME.sub("", match_rule.path.name) + section = _determine_manpage_section( + match_rule, provided_section, definition_source + ) + real_section = _determine_manpage_real_section( + match_rule, section, definition_source + ) + assert section is not None + language = _determine_manpage_language(match_rule, provided_language) + if language is None: + maybe_language = "" + else: + maybe_language = f"{language}/" + lang_suffix = f".{language}" + if inst_basename.endswith(lang_suffix): + inst_basename = inst_basename[: -len(lang_suffix)] + + return ( + f"usr/share/man/{maybe_language}man{real_section}/{inst_basename}.{section}" + ) + + return _manpage_dest_path + + +class SourcePathMatcher: + def __init__(self, auto_discard_rules: List["PluginProvidedDiscardRule"]) -> None: + self._already_matched: Dict[ + str, + Tuple[FrozenSet[BinaryPackage], str], + ] = {} + self._exact_match_request: Set[Tuple[str, str]] = set() + self._discarded: Dict[str, DiscardState] = {} + self._auto_discard_rules = auto_discard_rules + self.used_auto_discard_rules: Dict[str, Set[str]] = collections.defaultdict(set) + + def is_reserved(self, path: "VirtualPath") -> bool: + fs_path = path.fs_path + if fs_path in self._already_matched: + return True + result = self._discarded.get(fs_path, DiscardState.UNCHECKED) + if result == DiscardState.UNCHECKED: + result = self._check_plugin_provided_exclude_state_for(path) + if result == DiscardState.NOT_DISCARDED: + return False + + return True + + def exclude(self, path: str) -> None: + self._discarded[path] = DiscardState.DISCARDED_BY_MANIFEST_RULE + + def _run_plugin_provided_discard_rules_on(self, path: "VirtualPath") -> bool: + for dr in self._auto_discard_rules: + verdict = dr.should_discard(path) + if verdict: + self.used_auto_discard_rules[dr.name].add(path.fs_path) + return True + return False + + def _check_plugin_provided_exclude_state_for( + self, + path: "VirtualPath", + ) -> DiscardState: + cache_misses = [] + current_path = path + while True: + fs_path = current_path.fs_path + exclude_state = self._discarded.get(fs_path, DiscardState.UNCHECKED) + if exclude_state != DiscardState.UNCHECKED: + verdict = exclude_state + break + cache_misses.append(fs_path) + if self._run_plugin_provided_discard_rules_on(current_path): + verdict = DiscardState.DISCARDED_BY_PLUGIN_PROVIDED_RULE + break + # We cannot trust a "NOT_DISCARDED" until we check its parent (the directory could + # be excluded without the files in it triggering the rule). + parent_dir = current_path.parent_dir + if not parent_dir: + verdict = DiscardState.NOT_DISCARDED + break + current_path = parent_dir + if cache_misses: + for p in cache_misses: + self._discarded[p] = verdict + return verdict + + def may_match( + self, + match: PathMatch, + *, + is_exact_match: bool = False, + ) -> Tuple[FrozenSet[BinaryPackage], bool]: + m = self._already_matched.get(match.path.fs_path) + if m: + return m[0], False + current_path = match.path.fs_path + discard_state = self._discarded.get(current_path, DiscardState.UNCHECKED) + + if discard_state == DiscardState.UNCHECKED: + discard_state = self._check_plugin_provided_exclude_state_for(match.path) + + assert discard_state is not None and discard_state != DiscardState.UNCHECKED + + is_discarded = discard_state != DiscardState.NOT_DISCARDED + if ( + is_exact_match + and discard_state == DiscardState.DISCARDED_BY_PLUGIN_PROVIDED_RULE + ): + is_discarded = False + return frozenset(), is_discarded + + def reserve( + self, + path: "VirtualPath", + reserved_by: FrozenSet[BinaryPackage], + definition_source: str, + *, + is_exact_match: bool = False, + ) -> None: + fs_path = path.fs_path + self._already_matched[fs_path] = reserved_by, definition_source + if not is_exact_match: + return + for pkg in reserved_by: + m_key = (pkg.name, fs_path) + self._exact_match_request.add(m_key) + try: + del self._discarded[fs_path] + except KeyError: + pass + for discarded_paths in self.used_auto_discard_rules.values(): + discarded_paths.discard(fs_path) + + def detect_missing(self, search_dir: "VirtualPath") -> Iterator["VirtualPath"]: + stack = list(search_dir.iterdir) + while stack: + m = stack.pop() + if m.is_dir: + s_len = len(stack) + stack.extend(m.iterdir) + + if s_len == len(stack) and not self.is_reserved(m): + # "Explicitly" empty dir + yield m + elif not self.is_reserved(m): + yield m + + def find_and_reserve_all_matches( + self, + match_rule: MatchRule, + search_dirs: Sequence[SearchDir], + dir_only_match: bool, + match_filter: Optional[Callable[["VirtualPath"], bool]], + reserved_by: FrozenSet[BinaryPackage], + definition_source: str, + ) -> Tuple[List[PathMatch], Tuple[int, ...]]: + matched = [] + already_installed_paths = 0 + already_excluded_paths = 0 + glob_expand = False if isinstance(match_rule, ExactFileSystemPath) else True + + for match in _resolve_path( + match_rule, + search_dirs, + dir_only_match, + match_filter, + reserved_by, + ): + installed_into, excluded = self.may_match( + match, is_exact_match=not glob_expand + ) + if installed_into: + if glob_expand: + already_installed_paths += 1 + continue + packages = ", ".join(p.name for p in installed_into) + raise PathAlreadyInstalledOrDiscardedError( + f'The "{match.path.fs_path}" has been reserved by and installed into {packages}.' + f" The definition that triggered this issue is {definition_source}.", + match, + installed_into, + definition_source, + ) + if excluded: + if glob_expand: + already_excluded_paths += 1 + continue + raise PathAlreadyInstalledOrDiscardedError( + f'The "{match.path.fs_path}" has been excluded. If you want this path installed, move it' + f" above the exclusion rule that excluded it. The definition that triggered this" + f" issue is {definition_source}.", + match, + installed_into, + definition_source, + ) + if not glob_expand: + for pkg in match.into: + m_key = (pkg.name, match.path.fs_path) + if m_key in self._exact_match_request: + raise ExactPathMatchTwiceError( + f'The path "{match.path.fs_path}" (via exact match) has already been installed' + f" into {pkg.name}. The second installation triggered by {definition_source}", + match.path, + pkg, + definition_source, + ) + self._exact_match_request.add(m_key) + + if reserved_by: + self._already_matched[match.path.fs_path] = ( + match.into, + definition_source, + ) + else: + self.exclude(match.path.fs_path) + matched.append(match) + exclude_counts = already_installed_paths, already_excluded_paths + return matched, exclude_counts + + +def _resolve_path( + match_rule: MatchRule, + search_dirs: Iterable["SearchDir"], + dir_only_match: bool, + match_filter: Optional[Callable[["VirtualPath"], bool]], + into: FrozenSet[BinaryPackage], +) -> Iterator[PathMatch]: + missing_matches = set(into) + for sdir in search_dirs: + matched = False + if into and missing_matches.isdisjoint(sdir.applies_to): + # All the packages, where this search dir applies, already got a match + continue + applicable = sdir.applies_to & missing_matches + for matched_path in match_rule.finditer( + sdir.search_dir, + ignore_paths=match_filter, + ): + if dir_only_match and not matched_path.is_dir: + continue + if matched_path.parent_dir is None: + if match_rule is MATCH_ANYTHING: + continue + _error( + f"The pattern {match_rule.describe_match_short()} matched the root dir." + ) + yield PathMatch(matched_path, sdir.search_dir, False, applicable) + matched = True + # continue; we want to match everything we can from this search directory. + + if matched: + missing_matches -= applicable + if into and not missing_matches: + # For install rules, we can stop as soon as all packages had a match + # For discard rules, all search directories must be visited. Otherwise, + # you would have to repeat the discard rule once per search dir to be + # sure something is fully discarded + break + + +def _resolve_dest_paths( + match: PathMatch, + dest_paths: Sequence[Tuple[str, bool]], + install_context: "InstallRuleContext", +) -> Sequence[Tuple[str, "FSPath"]]: + dest_and_roots = [] + for dest_path, dest_path_is_format in dest_paths: + if dest_path_is_format: + for pkg in match.into: + parent_dir = match.path.parent_dir + pkg_install_context = install_context[pkg.name] + fs_root = pkg_install_context.fs_root + dpath = dest_path.format( + basename=match.path.name, + dirname=parent_dir.path if parent_dir is not None else "", + package_name=pkg.name, + doc_main_package_name=pkg_install_context.doc_main_package.name, + ) + if dpath.endswith("/"): + raise ValueError( + f'Provided destination (when resolved for {pkg.name}) for "{match.path.path}" ended' + f' with "/" ("{dest_path}"), which it must not!' + ) + dest_and_roots.append((dpath, fs_root)) + else: + if dest_path.endswith("/"): + raise ValueError( + f'Provided destination for "{match.path.path}" ended with "/" ("{dest_path}"),' + " which it must not!" + ) + dest_and_roots.extend( + (dest_path, install_context[pkg.name].fs_root) for pkg in match.into + ) + return dest_and_roots + + +def _resolve_matches( + matches: List[PathMatch], + dest_paths: Union[Sequence[Tuple[str, bool]], Callable[[PathMatch], str]], + install_context: "InstallRuleContext", +) -> Iterator[Tuple[PathMatch, Sequence[Tuple[str, "FSPath"]]]]: + if callable(dest_paths): + compute_dest_path = dest_paths + for match in matches: + dpath = compute_dest_path(match) + if dpath.endswith("/"): + raise ValueError( + f'Provided destination for "{match.path.path}" ended with "/" ("{dpath}"), which it must not!' + ) + dest_and_roots = [ + (dpath, install_context[pkg.name].fs_root) for pkg in match.into + ] + yield match, dest_and_roots + else: + for match in matches: + dest_and_roots = _resolve_dest_paths( + match, + dest_paths, + install_context, + ) + yield match, dest_and_roots + + +class InstallRule(DebputyDispatchableType): + __slots__ = ( + "_already_matched", + "_exact_match_request", + "_condition", + "_match_filter", + "_definition_source", + ) + + def __init__( + self, + condition: Optional[ManifestCondition], + definition_source: str, + *, + match_filter: Optional[Callable[["VirtualPath"], bool]] = None, + ) -> None: + self._condition = condition + self._definition_source = definition_source + self._match_filter = match_filter + + def _check_single_match( + self, source: FileSystemMatchRule, matches: List[PathMatch] + ) -> None: + seen_pkgs = set() + problem_pkgs = frozenset() + for m in matches: + problem_pkgs = seen_pkgs & m.into + if problem_pkgs: + break + seen_pkgs.update(problem_pkgs) + if problem_pkgs: + pkg_names = ", ".join(sorted(p.name for p in problem_pkgs)) + _error( + f'The pattern "{source.raw_match_rule}" matched multiple entries for the packages: {pkg_names}.' + "However, it should matched exactly one item. Please tighten the pattern defined" + f" in {self._definition_source}" + ) + + def _match_pattern( + self, + path_matcher: SourcePathMatcher, + fs_match_rule: FileSystemMatchRule, + condition_context: ConditionContext, + search_dirs: Sequence[SearchDir], + into: FrozenSet[BinaryPackage], + ) -> List[PathMatch]: + (matched, exclude_counts) = path_matcher.find_and_reserve_all_matches( + fs_match_rule.match_rule, + search_dirs, + fs_match_rule.raw_match_rule.endswith("/"), + self._match_filter, + into, + self._definition_source, + ) + + already_installed_paths, already_excluded_paths = exclude_counts + + if into: + allow_empty_match = all(not p.should_be_acted_on for p in into) + else: + # discard rules must match provided at least one search dir exist. If none of them + # exist, then we assume the discard rule is for a package that will not be built + allow_empty_match = any(s.search_dir.is_dir for s in search_dirs) + if self._condition is not None and not self._condition.evaluate( + condition_context + ): + allow_empty_match = True + + if not matched and not allow_empty_match: + search_dir_text = ", ".join(x.search_dir.fs_path for x in search_dirs) + if already_excluded_paths and already_installed_paths: + total_paths = already_excluded_paths + already_installed_paths + msg = ( + f"There were no matches for {fs_match_rule.raw_match_rule} in {search_dir_text} after ignoring" + f" {total_paths} path(s) already been matched previously either by install or" + f" exclude rules. If you wanted to install some of these paths into multiple" + f" packages, please tweak the definition that installed them to install them" + f' into multiple packages (usually change "into: foo" to "into: [foo, bar]".' + f" If you wanted to install these paths and exclude rules are getting in your" + f" way, then please move this install rule before the exclusion rule that causes" + f" issue or, in case of built-in excludes, list the paths explicitly (without" + f" using patterns). Source for this issue is {self._definition_source}. Match rule:" + f" {fs_match_rule.match_rule.describe_match_exact()}" + ) + elif already_excluded_paths: + msg = ( + f"There were no matches for {fs_match_rule.raw_match_rule} in {search_dir_text} after ignoring" + f" {already_excluded_paths} path(s) that have been excluded." + " If you wanted to install some of these paths, please move the install rule" + " before the exclusion rule or, in case of built-in excludes, list the paths explicitly" + f" (without using patterns). Source for this issue is {self._definition_source}. Match rule:" + f" {fs_match_rule.match_rule.describe_match_exact()}" + ) + elif already_installed_paths: + msg = ( + f"There were no matches for {fs_match_rule.raw_match_rule} in {search_dir_text} after ignoring" + f" {already_installed_paths} path(s) already been matched previously." + " If you wanted to install some of these paths into multiple packages," + f" please tweak the definition that installed them to install them into" + f' multiple packages (usually change "into: foo" to "into: [foo, bar]".' + f" Source for this issue is {self._definition_source}. Match rule:" + f" {fs_match_rule.match_rule.describe_match_exact()}" + ) + else: + # TODO: Try harder to find the match and point out possible typos + msg = ( + f"There were no matches for {fs_match_rule.raw_match_rule} in {search_dir_text} (definition:" + f" {self._definition_source}). Match rule: {fs_match_rule.match_rule.describe_match_exact()}" + ) + raise NoMatchForInstallPatternError( + msg, + fs_match_rule, + search_dirs, + self._definition_source, + ) + return matched + + def _install_matches( + self, + path_matcher: SourcePathMatcher, + matches: List[PathMatch], + dest_paths: Union[Sequence[Tuple[str, bool]], Callable[[PathMatch], str]], + install_context: "InstallRuleContext", + into: FrozenSet[BinaryPackage], + condition_context: ConditionContext, + ) -> None: + if ( + self._condition is not None + and not self._condition.evaluate(condition_context) + ) or not any(p.should_be_acted_on for p in into): + # Rule is disabled; skip all its actions - also allow empty matches + # for this particular case. + return + + if not matches: + raise ValueError("matches must not be empty") + + for match, dest_paths_and_roots in _resolve_matches( + matches, + dest_paths, + install_context, + ): + install_recursively_into_dirs = [] + for dest, fs_root in dest_paths_and_roots: + dir_part, basename = os.path.split(dest) + # We do not associate these with the FS path. First off, + # it is complicated to do in most cases (indeed, debhelper + # does not preserve these directories either) and secondly, + # it is "only" mtime and mode - mostly irrelevant as the + # directory is 99.9% likely to be 0755 (we are talking + # directories like "/usr", "/usr/share"). + dir_path = fs_root.mkdirs(dir_part) + existing_path = dir_path.get(basename) + + if match.path.is_dir: + if existing_path is not None and not existing_path.is_dir: + existing_path.unlink() + existing_path = None + current_dir = existing_path + + if current_dir is None: + current_dir = dir_path.mkdir( + basename, reference_path=match.path + ) + install_recursively_into_dirs.append(current_dir) + else: + if existing_path is not None and existing_path.is_dir: + _error( + f"Cannot install {match.path} ({match.path.fs_path}) as {dest}. That path already exist" + f" and is a directory. This error was triggered via {self._definition_source}." + ) + + if match.path.is_symlink: + dir_path.add_symlink( + basename, match.path.readlink(), reference_path=match.path + ) + else: + dir_path.insert_file_from_fs_path( + basename, + match.path.fs_path, + follow_symlinks=False, + use_fs_path_mode=True, + reference_path=match.path, + ) + if install_recursively_into_dirs: + self._install_dir_recursively( + path_matcher, install_recursively_into_dirs, match, into + ) + + def _install_dir_recursively( + self, + path_matcher: SourcePathMatcher, + parent_dirs: Sequence[FSPath], + match: PathMatch, + into: FrozenSet[BinaryPackage], + ) -> None: + stack = [ + (parent_dirs, e) + for e in match.path.iterdir + if not path_matcher.is_reserved(e) + ] + + while stack: + current_dirs, dir_entry = stack.pop() + path_matcher.reserve( + dir_entry, + into, + self._definition_source, + is_exact_match=False, + ) + if dir_entry.is_dir: + new_dirs = [ + d.mkdir(dir_entry.name, reference_path=dir_entry) + for d in current_dirs + ] + stack.extend( + (new_dirs, de) + for de in dir_entry.iterdir + if not path_matcher.is_reserved(de) + ) + elif dir_entry.is_symlink: + for current_dir in current_dirs: + current_dir.add_symlink( + dir_entry.name, + dir_entry.readlink(), + reference_path=dir_entry, + ) + elif dir_entry.is_file: + for current_dir in current_dirs: + current_dir.insert_file_from_fs_path( + dir_entry.name, + dir_entry.fs_path, + use_fs_path_mode=True, + follow_symlinks=False, + reference_path=dir_entry, + ) + else: + _error( + f"Unsupported file type: {dir_entry.fs_path} - neither a file, directory or symlink" + ) + + def perform_install( + self, + path_matcher: SourcePathMatcher, + install_context: InstallRuleContext, + condition_context: ConditionContext, + ) -> None: + raise NotImplementedError + + @classmethod + def install_as( + cls, + source: FileSystemMatchRule, + dest_path: str, + into: FrozenSet[BinaryPackage], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> "InstallRule": + return GenericInstallationRule( + [source], + [(dest_path, False)], + into, + condition, + definition_source, + require_single_match=True, + ) + + @classmethod + def install_dest( + cls, + sources: Sequence[FileSystemMatchRule], + dest_dir: Optional[str], + into: FrozenSet[BinaryPackage], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> "InstallRule": + if dest_dir is None: + dest_dir = "{dirname}/{basename}" + else: + dest_dir = os.path.join(dest_dir, "{basename}") + return GenericInstallationRule( + sources, + [(dest_dir, True)], + into, + condition, + definition_source, + ) + + @classmethod + def install_multi_as( + cls, + source: FileSystemMatchRule, + dest_paths: Sequence[str], + into: FrozenSet[BinaryPackage], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> "InstallRule": + if len(dest_paths) < 2: + raise ValueError( + "Please use `install_as` when there is less than 2 dest path" + ) + dps = tuple((dp, False) for dp in dest_paths) + return GenericInstallationRule( + [source], + dps, + into, + condition, + definition_source, + require_single_match=True, + ) + + @classmethod + def install_multi_dest( + cls, + sources: Sequence[FileSystemMatchRule], + dest_dirs: Sequence[str], + into: FrozenSet[BinaryPackage], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> "InstallRule": + if len(dest_dirs) < 2: + raise ValueError( + "Please use `install_dest` when there is less than 2 dest dir" + ) + dest_paths = tuple((os.path.join(dp, "{basename}"), True) for dp in dest_dirs) + return GenericInstallationRule( + sources, + dest_paths, + into, + condition, + definition_source, + ) + + @classmethod + def install_doc( + cls, + sources: Sequence[FileSystemMatchRule], + dest_dir: Optional[str], + into: FrozenSet[BinaryPackage], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> "InstallRule": + cond: ManifestCondition = _BUILD_DOCS_BDO + if condition is not None: + cond = ManifestCondition.all_of([cond, condition]) + dest_path_is_format = False + if dest_dir is None: + dest_dir = "usr/share/doc/{doc_main_package_name}/{basename}" + dest_path_is_format = True + + return GenericInstallationRule( + sources, + [(dest_dir, dest_path_is_format)], + into, + cond, + definition_source, + ) + + @classmethod + def install_doc_as( + cls, + source: FileSystemMatchRule, + dest_path: str, + into: FrozenSet[BinaryPackage], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> "InstallRule": + cond: ManifestCondition = _BUILD_DOCS_BDO + if condition is not None: + cond = ManifestCondition.all_of([cond, condition]) + + return GenericInstallationRule( + [source], + [(dest_path, False)], + into, + cond, + definition_source, + require_single_match=True, + ) + + @classmethod + def install_examples( + cls, + sources: Sequence[FileSystemMatchRule], + into: FrozenSet[BinaryPackage], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> "InstallRule": + cond: ManifestCondition = _BUILD_DOCS_BDO + if condition is not None: + cond = ManifestCondition.all_of([cond, condition]) + return GenericInstallationRule( + sources, + [("usr/share/doc/{doc_main_package_name}/examples/{basename}", True)], + into, + cond, + definition_source, + ) + + @classmethod + def install_man( + cls, + sources: Sequence[FileSystemMatchRule], + into: FrozenSet[BinaryPackage], + section: Optional[int], + language: Optional[str], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> "InstallRule": + cond: ManifestCondition = _BUILD_DOCS_BDO + if condition is not None: + cond = ManifestCondition.all_of([cond, condition]) + + dest_path_computer = _dest_path_for_manpage( + section, language, definition_source + ) + + return GenericInstallationRule( + sources, + dest_path_computer, + into, + cond, + definition_source, + match_filter=lambda m: not m.is_file, + ) + + @classmethod + def discard_paths( + cls, + paths: Sequence[FileSystemMatchRule], + definition_source: str, + condition: Optional[ManifestCondition], + *, + limit_to: Optional[Sequence[FileSystemExactMatchRule]] = None, + ) -> "InstallRule": + return DiscardRule( + paths, + condition, + tuple(limit_to) if limit_to is not None else tuple(), + definition_source, + ) + + +class PPFInstallRule(InstallRule): + __slots__ = ( + "_ppfs", + "_substitution", + "_into", + ) + + def __init__( + self, + into: BinaryPackage, + substitution: Substitution, + ppfs: Sequence["PackagerProvidedFile"], + ) -> None: + super().__init__( + None, + "<built-in; PPF install rule>", + ) + self._substitution = substitution + self._ppfs = ppfs + self._into = into + + def perform_install( + self, + path_matcher: SourcePathMatcher, + install_context: InstallRuleContext, + condition_context: ConditionContext, + ) -> None: + binary_install_context = install_context[self._into.name] + fs_root = binary_install_context.fs_root + for ppf in self._ppfs: + source_path = ppf.path.fs_path + dest_dir, name = ppf.compute_dest() + dir_path = fs_root.mkdirs(dest_dir) + + dir_path.insert_file_from_fs_path( + name, + source_path, + follow_symlinks=True, + use_fs_path_mode=False, + mode=ppf.definition.default_mode, + ) + + +class GenericInstallationRule(InstallRule): + __slots__ = ( + "_sources", + "_into", + "_dest_paths", + "_require_single_match", + ) + + def __init__( + self, + sources: Sequence[FileSystemMatchRule], + dest_paths: Union[Sequence[Tuple[str, bool]], Callable[[PathMatch], str]], + into: FrozenSet[BinaryPackage], + condition: Optional[ManifestCondition], + definition_source: str, + *, + require_single_match: bool = False, + match_filter: Optional[Callable[["VirtualPath"], bool]] = None, + ) -> None: + super().__init__( + condition, + definition_source, + match_filter=match_filter, + ) + self._sources = sources + self._into = into + self._dest_paths = dest_paths + self._require_single_match = require_single_match + if self._require_single_match and len(sources) != 1: + raise ValueError("require_single_match implies sources must have len 1") + + def perform_install( + self, + path_matcher: SourcePathMatcher, + install_context: InstallRuleContext, + condition_context: ConditionContext, + ) -> None: + for source in self._sources: + matches = self._match_pattern( + path_matcher, + source, + condition_context, + install_context.search_dirs, + self._into, + ) + if self._require_single_match and len(matches) > 1: + self._check_single_match(source, matches) + self._install_matches( + path_matcher, + matches, + self._dest_paths, + install_context, + self._into, + condition_context, + ) + + +class DiscardRule(InstallRule): + __slots__ = ("_fs_match_rules", "_limit_to") + + def __init__( + self, + fs_match_rules: Sequence[FileSystemMatchRule], + condition: Optional[ManifestCondition], + limit_to: Sequence[FileSystemExactMatchRule], + definition_source: str, + ) -> None: + super().__init__(condition, definition_source) + self._fs_match_rules = fs_match_rules + self._limit_to = limit_to + + def perform_install( + self, + path_matcher: SourcePathMatcher, + install_context: InstallRuleContext, + condition_context: ConditionContext, + ) -> None: + into = frozenset() + limit_to = self._limit_to + if limit_to: + matches = {x.match_rule.path for x in limit_to} + search_dirs = tuple( + s + for s in install_context.search_dirs + if s.search_dir.fs_path in matches + ) + if len(limit_to) != len(search_dirs): + matches.difference(s.search_dir.fs_path for s in search_dirs) + paths = ":".join(matches) + _error( + f"The discard rule defined at {self._definition_source} mentions the following" + f" search directories that were not known to debputy: {paths}." + " Either the search dir is missing somewhere else or it should be removed from" + " the discard rule." + ) + else: + search_dirs = install_context.search_dirs + + for fs_match_rule in self._fs_match_rules: + self._match_pattern( + path_matcher, + fs_match_rule, + condition_context, + search_dirs, + into, + ) diff --git a/src/debputy/intermediate_manifest.py b/src/debputy/intermediate_manifest.py new file mode 100644 index 0000000..7d8dd63 --- /dev/null +++ b/src/debputy/intermediate_manifest.py @@ -0,0 +1,333 @@ +import dataclasses +import json +import os +import stat +import sys +import tarfile +from enum import Enum + + +from typing import Optional, List, Dict, Any, Iterable, Union, Self, Mapping, IO + +IntermediateManifest = List["TarMember"] + + +class PathType(Enum): + FILE = ("file", tarfile.REGTYPE) + DIRECTORY = ("directory", tarfile.DIRTYPE) + SYMLINK = ("symlink", tarfile.SYMTYPE) + # TODO: Add hardlink, FIFO, Char device, BLK device, etc. + + @property + def manifest_key(self) -> str: + return self.value[0] + + @property + def tarinfo_type(self) -> bytes: + return self.value[1] + + @property + def can_be_virtual(self) -> bool: + return self in (PathType.DIRECTORY, PathType.SYMLINK) + + +KEY2PATH_TYPE = {pt.manifest_key: pt for pt in PathType} + + +def _dirname(path: str) -> str: + path = path.rstrip("/") + if path == ".": + return path + return os.path.dirname(path) + + +def _fs_type_from_st_mode(fs_path: str, st_mode: int) -> PathType: + if stat.S_ISREG(st_mode): + path_type = PathType.FILE + elif stat.S_ISDIR(st_mode): + path_type = PathType.DIRECTORY + # elif stat.S_ISFIFO(st_result): + # type = FIFOTYPE + elif stat.S_ISLNK(st_mode): + raise ValueError( + "Symlinks should have been rewritten to use the virtual rule." + " Otherwise, the link would not be normalized according to Debian Policy." + ) + # elif stat.S_ISCHR(st_result): + # type = CHRTYPE + # elif stat.S_ISBLK(st_result): + # type = BLKTYPE + else: + raise ValueError( + f"The path {fs_path} had an unsupported/unknown file type." + f" Probably a bug in the tool" + ) + return path_type + + +@dataclasses.dataclass(slots=True) +class TarMember: + member_path: str + path_type: PathType + fs_path: Optional[str] + mode: int + owner: str + uid: int + group: str + gid: int + mtime: float + link_target: str = "" + is_virtual_entry: bool = False + may_steal_fs_path: bool = False + + def create_tar_info(self, tar_fd: tarfile.TarFile) -> tarfile.TarInfo: + tar_info: tarfile.TarInfo + if self.is_virtual_entry: + assert self.path_type.can_be_virtual + tar_info = tar_fd.tarinfo(self.member_path) + tar_info.size = 0 + tar_info.type = self.path_type.tarinfo_type + tar_info.linkpath = self.link_target + else: + try: + tar_info = tar_fd.gettarinfo( + name=self.fs_path, arcname=self.member_path + ) + except (TypeError, ValueError) as e: + raise ValueError( + f"Unable to prepare tar info for {self.member_path}" + ) from e + # TODO: Eventually, we should be able to unconditionally rely on link_target. However, + # until we got symlinks and hardlinks correctly done in the JSON generator, it will be + # conditional for now. + if self.link_target != "": + tar_info.linkpath = self.link_target + tar_info.mode = self.mode + tar_info.uname = self.owner + tar_info.uid = self.uid + tar_info.gname = self.group + tar_info.gid = self.gid + tar_info.mode = self.mode + tar_info.mtime = int(self.mtime) + + return tar_info + + @classmethod + def from_file( + cls, + member_path: str, + fs_path: str, + mode: Optional[int] = None, + owner: str = "root", + uid: int = 0, + group: str = "root", + gid: int = 0, + path_mtime: Optional[Union[float, int]] = None, + clamp_mtime_to: Optional[int] = None, + path_type: Optional[PathType] = None, + may_steal_fs_path: bool = False, + ) -> "TarMember": + # Avoid lstat'ing if we can as it makes it easier to do tests of the code + # (as we do not need an existing physical fs path) + if path_type is None or path_mtime is None or mode is None: + st_result = os.lstat(fs_path) + st_mode = st_result.st_mode + if mode is None: + mode = st_mode + if path_mtime is None: + path_mtime = st_result.st_mtime + if path_type is None: + path_type = _fs_type_from_st_mode(fs_path, st_mode) + + if clamp_mtime_to is not None and path_mtime > clamp_mtime_to: + path_mtime = clamp_mtime_to + + if may_steal_fs_path: + assert ( + "debputy/scratch-dir/" in fs_path + ), f"{fs_path} should not have been stealable" + + return cls( + member_path=member_path, + path_type=path_type, + fs_path=fs_path, + mode=mode, + owner=owner, + uid=uid, + group=group, + gid=gid, + mtime=float(path_mtime), + is_virtual_entry=False, + may_steal_fs_path=may_steal_fs_path, + ) + + @classmethod + def virtual_path( + cls, + member_path: str, + path_type: PathType, + mtime: float, + mode: int, + link_target: str = "", + owner: str = "root", + uid: int = 0, + group: str = "root", + gid: int = 0, + ) -> Self: + if not path_type.can_be_virtual: + raise ValueError(f"The path type {path_type.name} cannot be virtual") + if (path_type == PathType.SYMLINK) ^ bool(link_target): + if not link_target: + raise ValueError("Symlinks must have a link target") + # TODO: Dear future programmer. Hardlinks will appear here some day and you will have to fix this + # code then! + raise ValueError("Non-symlinks must not have a link target") + return cls( + member_path=member_path, + path_type=path_type, + fs_path=None, + link_target=link_target, + mode=mode, + owner=owner, + uid=uid, + group=group, + gid=gid, + mtime=mtime, + is_virtual_entry=True, + ) + + def clone_and_replace(self, /, **changes: Any) -> "TarMember": + return dataclasses.replace(self, **changes) + + def to_manifest(self) -> Dict[str, Any]: + d = dataclasses.asdict(self) + try: + d["mode"] = oct(self.mode) + except (TypeError, ValueError) as e: + raise TypeError(f"Bad mode in TarMember {self.member_path}") from e + d["path_type"] = self.path_type.manifest_key + # "compress" the output by removing redundant fields + if self.link_target is None or self.link_target == "": + del d["link_target"] + if self.is_virtual_entry: + assert self.fs_path is None + del d["fs_path"] + else: + del d["is_virtual_entry"] + return d + + @classmethod + def parse_intermediate_manifest(cls, manifest_path: str) -> IntermediateManifest: + directories = {"."} + if manifest_path == "-": + with sys.stdin as fd: + data = json.load(fd) + contents = [TarMember.from_dict(m) for m in data] + else: + with open(manifest_path) as fd: + data = json.load(fd) + contents = [TarMember.from_dict(m) for m in data] + if not contents: + raise ValueError( + "Empty manifest (note that the root directory should always be present" + ) + if contents[0].member_path != "./": + raise ValueError('The first member must always be the root directory "./"') + for tar_member in contents: + directory = _dirname(tar_member.member_path) + if directory not in directories: + raise ValueError( + f'The path "{tar_member.member_path}" came before the directory it is in (or the path' + f" is not a directory). Either way leads to a broken deb." + ) + if tar_member.path_type == PathType.DIRECTORY: + directories.add(tar_member.member_path.rstrip("/")) + return contents + + @classmethod + def from_dict(cls, d: Any) -> "TarMember": + member_path = d["member_path"] + raw_mode = d["mode"] + if not raw_mode.startswith("0o"): + raise ValueError(f"Bad mode for {member_path}") + is_virtual_entry = d.get("is_virtual_entry") or False + path_type = KEY2PATH_TYPE[d["path_type"]] + fs_path = d.get("fs_path") + mode = int(raw_mode[2:], 8) + if is_virtual_entry: + if not path_type.can_be_virtual: + raise ValueError( + f"Bad file type or is_virtual_entry for {d['member_path']}." + " The file type cannot be virtual" + ) + if fs_path is not None: + raise ValueError( + f'Invalid declaration for "{member_path}".' + " The path is listed as a virtual entry but has a file system path" + ) + elif fs_path is None: + raise ValueError( + f'Invalid declaration for "{member_path}".' + " The path is neither a virtual path nor does it have a file system path!" + ) + if path_type == PathType.DIRECTORY and not member_path.endswith("/"): + raise ValueError( + f'Invalid declaration for "{member_path}".' + " The path is listed as a directory but does not end with a slash" + ) + + link_target = d.get("link_target") + if path_type == PathType.SYMLINK: + if mode != 0o777: + raise ValueError( + f'Invalid declaration for "{member_path}".' + f" Symlinks must have mode 0o0777, got {oct(mode)[2:]}." + ) + if not link_target: + raise ValueError( + f'Invalid declaration for "{member_path}".' + " Symlinks must have a link_target" + ) + elif link_target is not None and link_target != "": + # TODO: Eventually hardlinks should have them too. But that is a problem for a future programmer + raise ValueError( + f'Invalid declaration for "{member_path}".' + " Only symlinks can have a link_target" + ) + else: + link_target = "" + may_steal_fs_path = d.get("may_steal_fs_path") or False + + if may_steal_fs_path: + assert ( + "debputy/scratch-dir/" in fs_path + ), f"{fs_path} should not have been stealable" + return cls( + member_path=member_path, + path_type=path_type, + fs_path=fs_path, + mode=mode, + owner=d["owner"], + uid=d["uid"], + group=d["group"], + gid=d["gid"], + mtime=float(d["mtime"]), + link_target=link_target, + is_virtual_entry=is_virtual_entry, + may_steal_fs_path=may_steal_fs_path, + ) + + +def output_intermediate_manifest( + manifest_output_file: str, + members: Iterable[TarMember], +) -> None: + with open(manifest_output_file, "w") as fd: + output_intermediate_manifest_to_fd(fd, members) + + +def output_intermediate_manifest_to_fd( + fd: IO[str], members: Iterable[TarMember] +) -> None: + serial_format = [m.to_manifest() for m in members] + json.dump(serial_format, fd) diff --git a/src/debputy/interpreter.py b/src/debputy/interpreter.py new file mode 100644 index 0000000..0d986e1 --- /dev/null +++ b/src/debputy/interpreter.py @@ -0,0 +1,220 @@ +import dataclasses +import os.path +import re +import shutil +from typing import Optional, IO, TYPE_CHECKING + +if TYPE_CHECKING: + from debputy.plugin.api import VirtualPath + +_SHEBANG_RE = re.compile( + rb""" + ^[#][!]\s* + (/\S+/([a-zA-Z][^/\s]*)) +""", + re.VERBOSE | re.ASCII, +) +_WORD = re.compile(rb"\s+(\S+)") +_STRIP_VERSION = re.compile(r"(-?\d+(?:[.]\d.+)?)$") + +_KNOWN_INTERPRETERS = { + os.path.basename(c): c + for c in ["/bin/sh", "/bin/bash", "/bin/dash", "/usr/bin/perl", "/usr/bin/python"] +} + + +class Interpreter: + @property + def original_command(self) -> str: + """The original command (without arguments) from the #! line + + This returns the command as it was written (without flags/arguments) in the file. + + Note as a special-case, if the original command is `env` then the first argument is included + as well, because it is assumed to be the real command. + + + >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter + >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") + >>> python3.original_command + '/usr/bin/python3' + >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") + >>> env_sh.original_command + '/usr/bin/env sh' + + :return: The original command in the #!-line + """ + raise NotImplementedError + + @property + def command_full_basename(self) -> str: + """The full basename of the command (with version) + + Note that for #!-lines that uses `env`, this will return the argument for `env` rather than + `env`. + + >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter + >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") + >>> python3.command_full_basename + 'python3' + >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") + >>> env_sh.command_full_basename + 'sh' + + :return: The full basename of the command. + """ + raise NotImplementedError + + @property + def command_stem(self) -> str: + """The basename of the command **without** version + + Note that for #!-lines that uses `env`, this will return the argument for `env` rather than + `env`. + + >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter + >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") + >>> python3.command_stem + 'python' + >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") + >>> env_sh.command_stem + 'sh' + >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3.12-dbg -b") + >>> python3.command_stem + 'python' + + :return: The basename of the command **without** version. + """ + raise NotImplementedError + + @property + def interpreter_version(self) -> str: + """The version part of the basename + + Note that for #!-lines that uses `env`, this will return the argument for `env` rather than + `env`. + + >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter + >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") + >>> python3.interpreter_version + '3' + >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") + >>> env_sh.interpreter_version + '' + >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3.12-dbg -b") + >>> python3.interpreter_version + '3.12-dbg' + + :return: The version part of the command or the empty string if the command is versionless. + """ + raise NotImplementedError + + @property + def fixup_needed(self) -> bool: + """Whether the interpreter uses a non-canonical location + + >>> # Note: Normally, you would use `VirtualPath.interpreter()` instead for extracting the interpreter + >>> python3 = extract_shebang_interpreter(b"#! /usr/bin/python3 -b") + >>> python3.fixup_needed + False + >>> env_sh = extract_shebang_interpreter(b"#! /usr/bin/env sh") + >>> env_sh.fixup_needed + True + >>> ub_sh = extract_shebang_interpreter(b"#! /usr/bin/sh") + >>> ub_sh.fixup_needed + True + >>> sh = extract_shebang_interpreter(b"#! /bin/sh") + >>> sh.fixup_needed + False + + :return: True if this interpreter is uses a non-canonical version. + """ + return False + + +@dataclasses.dataclass(slots=True, frozen=True) +class DetectedInterpreter(Interpreter): + original_command: str + command_full_basename: str + command_stem: str + interpreter_version: str + correct_command: Optional[str] = None + corrected_shebang_line: Optional[str] = None + + @property + def fixup_needed(self) -> bool: + return self.corrected_shebang_line is not None + + def replace_shebang_line(self, path: "VirtualPath") -> None: + new_shebang_line = self.corrected_shebang_line + assert new_shebang_line.startswith("#!") + if not new_shebang_line.endswith("\n"): + new_shebang_line += "\n" + parent_dir = path.parent_dir + assert parent_dir is not None + with path.open(byte_io=True) as rfd: + original_first_line = rfd.readline() + if not original_first_line.startswith(b"#!"): + raise ValueError( + f'The provided path "{path.path}" does not start with a shebang line!?' + ) + mtime = path.mtime + with path.replace_fs_path_content() as new_fs_path, open( + new_fs_path, "wb" + ) as wfd: + wfd.write(new_shebang_line.encode("utf-8")) + shutil.copyfileobj(rfd, wfd) + # Ensure the mtime is not updated (we do not count interpreter correction as a "change") + path.mtime = mtime + + +def extract_shebang_interpreter_from_file( + fd: IO[bytes], +) -> Optional[DetectedInterpreter]: + first_line = fd.readline(4096) + if b"\n" not in first_line: + # If there is no newline, then it is probably not a shebang line + return None + return extract_shebang_interpreter(first_line) + + +def extract_shebang_interpreter(first_line: bytes) -> Optional[DetectedInterpreter]: + m = _SHEBANG_RE.search(first_line) + if not m: + return None + raw_command = m.group(1).strip().decode("utf-8") + command_full_basename = m.group(2).strip().decode("utf-8") + endpos = m.end() + if command_full_basename == "env": + wm = _WORD.search(first_line, pos=m.end()) + if wm is not None: + command_full_basename = wm.group(1).decode("utf-8") + raw_command += " " + command_full_basename + endpos = wm.end() + command_stem = command_full_basename + vm = _STRIP_VERSION.search(command_full_basename) + if vm: + version = vm.group(1) + command_stem = command_full_basename[: -len(version)] + else: + version = "" + correct_command = _KNOWN_INTERPRETERS.get(command_stem) + if correct_command is not None and version != "": + correct_command += version + + if correct_command is not None and correct_command != raw_command: + trailing = first_line[endpos + 1 :].strip().decode("utf-8") + corrected_shebang_line = "#! " + correct_command + if trailing: + corrected_shebang_line += " " + trailing + else: + corrected_shebang_line = None + + return DetectedInterpreter( + raw_command, + command_full_basename, + command_stem, + version, + correct_command, + corrected_shebang_line, + ) diff --git a/src/debputy/linting/__init__.py b/src/debputy/linting/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/linting/__init__.py diff --git a/src/debputy/linting/lint_impl.py b/src/debputy/linting/lint_impl.py new file mode 100644 index 0000000..68be9d9 --- /dev/null +++ b/src/debputy/linting/lint_impl.py @@ -0,0 +1,322 @@ +import os +import stat +import sys +from typing import Optional, List, Union, NoReturn + +from lsprotocol.types import ( + CodeAction, + Command, + CodeActionParams, + CodeActionContext, + TextDocumentIdentifier, + TextEdit, + Position, + DiagnosticSeverity, +) + +from debputy.commands.debputy_cmd.context import CommandContext +from debputy.commands.debputy_cmd.output import _output_styling, OutputStylingBase +from debputy.linting.lint_util import ( + LINTER_POSITION_CODEC, + report_diagnostic, + LinterImpl, + LintReport, +) +from debputy.lsp.lsp_debian_changelog import _lint_debian_changelog +from debputy.lsp.lsp_debian_control import _lint_debian_control +from debputy.lsp.lsp_debian_copyright import _lint_debian_copyright +from debputy.lsp.lsp_debian_debputy_manifest import _lint_debian_debputy_manifest +from debputy.lsp.lsp_debian_rules import _lint_debian_rules +from debputy.lsp.quickfixes import provide_standard_quickfixes_from_diagnostics +from debputy.lsp.spellchecking import disable_spellchecking +from debputy.lsp.text_edit import ( + get_well_formatted_edit, + merge_sort_text_edits, + apply_text_edits, +) +from debputy.util import _warn, _error, _info + +LINTER_FORMATS = { + "debian/control": _lint_debian_control, + "debian/copyright": _lint_debian_copyright, + "debian/changelog": _lint_debian_changelog, + "debian/rules": _lint_debian_rules, + "debian/debputy.manifest": _lint_debian_debputy_manifest, +} + + +def perform_linting(context: CommandContext) -> None: + parsed_args = context.parsed_args + if not parsed_args.spellcheck: + disable_spellchecking() + linter_exit_code = parsed_args.linter_exit_code + lint_report = LintReport() + fo = _output_styling(context.parsed_args, sys.stdout) + for name_stem in LINTER_FORMATS: + filename = f"./{name_stem}" + if not os.path.isfile(filename): + continue + perform_linting_of_file( + fo, + filename, + name_stem, + context.parsed_args.auto_fix, + lint_report, + ) + if lint_report.diagnostics_without_severity: + _warn( + "Some diagnostics did not explicitly set severity. Please report the bug and include the output" + ) + if lint_report.diagnostic_errors: + _error( + "Some sub-linters reported issues. Please report the bug and include the output" + ) + + if os.path.isfile("debian/debputy.manifest"): + _info("Note: Due to a limitation in the linter, debian/debputy.manifest is") + _info("only **partially** checked by this command at the time of writing.") + _info("Please use `debputy check-manifest` for checking the manifest.") + + if linter_exit_code: + _exit_with_lint_code(lint_report) + + +def _exit_with_lint_code(lint_report: LintReport) -> NoReturn: + diagnostics_count = lint_report.diagnostics_count + if ( + diagnostics_count[DiagnosticSeverity.Error] + or diagnostics_count[DiagnosticSeverity.Warning] + ): + sys.exit(2) + sys.exit(0) + + +def perform_linting_of_file( + fo: OutputStylingBase, + filename: str, + file_format: str, + auto_fixing_enabled: bool, + lint_report: LintReport, +) -> None: + handler = LINTER_FORMATS.get(file_format) + if handler is None: + return + with open(filename, "rt", encoding="utf-8") as fd: + text = fd.read() + + if auto_fixing_enabled: + _auto_fix_run(fo, filename, text, handler, lint_report) + else: + _diagnostics_run(fo, filename, text, handler, lint_report) + + +def _auto_fix_run( + fo: OutputStylingBase, + filename: str, + text: str, + linter: LinterImpl, + lint_report: LintReport, +) -> None: + another_round = True + unfixed_diagnostics = [] + remaining_rounds = 10 + fixed_count = False + too_many_rounds = False + lines = text.splitlines(keepends=True) + current_issues = linter(filename, filename, lines, LINTER_POSITION_CODEC) + issue_count_start = len(current_issues) if current_issues else 0 + while another_round and current_issues: + another_round = False + last_fix_position = Position(0, 0) + unfixed_diagnostics.clear() + edits = [] + fixed_diagnostics = [] + for diagnostic in current_issues: + actions = provide_standard_quickfixes_from_diagnostics( + CodeActionParams( + TextDocumentIdentifier(filename), + diagnostic.range, + CodeActionContext( + [diagnostic], + ), + ) + ) + auto_fixing_edits = resolve_auto_fixer(filename, actions) + + if not auto_fixing_edits: + unfixed_diagnostics.append(diagnostic) + continue + + sorted_edits = merge_sort_text_edits( + [get_well_formatted_edit(e) for e in auto_fixing_edits], + ) + last_edit = sorted_edits[-1] + last_edit_pos = last_edit.range.start + if ( + last_edit_pos.line <= last_fix_position.line + or last_edit_pos.character < last_fix_position.character + ): + if not another_round: + + if remaining_rounds > 0: + remaining_rounds -= 1 + print( + "Detected overlapping edit; scheduling another edit round." + ) + another_round = True + else: + _warn( + "Too many overlapping edits; stopping after this round (circuit breaker)." + ) + too_many_rounds = True + continue + edits.extend(sorted_edits) + fixed_diagnostics.append(diagnostic) + + if another_round and not edits: + _error( + "Internal error: Detected an overlapping edit and yet had edits to perform..." + ) + + fixed_count += len(fixed_diagnostics) + + text = apply_text_edits( + text, + lines, + edits, + ) + lines = text.splitlines(keepends=True) + + for diagnostic in fixed_diagnostics: + report_diagnostic( + fo, + filename, + diagnostic, + lines, + True, + True, + lint_report, + ) + current_issues = linter(filename, filename, lines, LINTER_POSITION_CODEC) + + if fixed_count: + output_filename = f"{filename}.tmp" + with open(output_filename, "wt", encoding="utf-8") as fd: + fd.write(text) + orig_mode = stat.S_IMODE(os.stat(filename).st_mode) + os.chmod(output_filename, orig_mode) + os.rename(output_filename, filename) + lines = text.splitlines(keepends=True) + remaining_issues = ( + linter(filename, filename, lines, LINTER_POSITION_CODEC) or [] + ) + else: + remaining_issues = current_issues or [] + + for diagnostic in remaining_issues: + report_diagnostic( + fo, + filename, + diagnostic, + lines, + False, + False, + lint_report, + ) + + print() + if fixed_count: + remaining_issues_count = len(remaining_issues) + print( + fo.colored( + f"Fixes applied to {filename}: {fixed_count}." + f" Number of issues went from {issue_count_start} to {remaining_issues_count}", + fg="green", + style="bold", + ) + ) + elif remaining_issues: + print( + fo.colored( + f"None of the issues in {filename} could be fixed automatically. Sorry!", + fg="yellow", + bg="black", + style="bold", + ) + ) + else: + assert not current_issues + print( + fo.colored( + f"No issues detected in {filename}", + fg="green", + style="bold", + ) + ) + if too_many_rounds: + print( + fo.colored( + f"Not all fixes for issues in {filename} could be applied due to overlapping edits.", + fg="yellow", + bg="black", + style="bold", + ) + ) + print( + "Running once more may cause more fixes to be applied. However, you may be facing" + " pathological performance." + ) + + +def _diagnostics_run( + fo: OutputStylingBase, + filename: str, + text: str, + linter: LinterImpl, + lint_report: LintReport, +) -> None: + lines = text.splitlines(keepends=True) + issues = linter(filename, filename, lines, LINTER_POSITION_CODEC) or [] + for diagnostic in issues: + actions = provide_standard_quickfixes_from_diagnostics( + CodeActionParams( + TextDocumentIdentifier(filename), + diagnostic.range, + CodeActionContext( + [diagnostic], + ), + ) + ) + auto_fixer = resolve_auto_fixer(filename, actions) + has_auto_fixer = bool(auto_fixer) + + report_diagnostic( + fo, + filename, + diagnostic, + lines, + has_auto_fixer, + False, + lint_report, + ) + + +def resolve_auto_fixer( + document_ref: str, + actions: Optional[List[Union[Command, CodeAction]]], +) -> Optional[List[TextEdit]]: + if actions is None or len(actions) != 1: + return None + action = actions[0] + if not isinstance(action, CodeAction): + return None + workspace_edit = action.edit + if workspace_edit is None or action.command is not None: + return None + if ( + not workspace_edit.changes + or len(workspace_edit.changes) != 1 + or document_ref not in workspace_edit.changes + ): + return None + return workspace_edit.changes[document_ref] diff --git a/src/debputy/linting/lint_util.py b/src/debputy/linting/lint_util.py new file mode 100644 index 0000000..7cdb8b6 --- /dev/null +++ b/src/debputy/linting/lint_util.py @@ -0,0 +1,175 @@ +import dataclasses +from typing import List, Optional, Callable, Counter + +from lsprotocol.types import Position, Range, Diagnostic, DiagnosticSeverity + +from debputy.commands.debputy_cmd.output import OutputStylingBase +from debputy.util import _DEFAULT_LOGGER, _warn + +LinterImpl = Callable[ + [str, str, List[str], "LintCapablePositionCodec"], Optional[List[Diagnostic]] +] + + +@dataclasses.dataclass(slots=True) +class LintReport: + diagnostics_count: Counter[DiagnosticSeverity] = dataclasses.field( + default_factory=Counter + ) + diagnostics_without_severity: int = 0 + diagnostic_errors: int = 0 + fixed: int = 0 + fixable: int = 0 + + +class LinterPositionCodec: + + def client_num_units(self, chars: str): + return len(chars) + + def position_from_client_units( + self, lines: List[str], position: Position + ) -> Position: + + if len(lines) == 0: + return Position(0, 0) + if position.line >= len(lines): + return Position(len(lines) - 1, self.client_num_units(lines[-1])) + return position + + def position_to_client_units( + self, _lines: List[str], position: Position + ) -> Position: + return position + + def range_from_client_units(self, _lines: List[str], range: Range) -> Range: + return range + + def range_to_client_units(self, _lines: List[str], range: Range) -> Range: + return range + + +LINTER_POSITION_CODEC = LinterPositionCodec() + + +_SEVERITY2TAG = { + DiagnosticSeverity.Error: lambda fo: fo.colored( + "error", + fg="red", + bg="black", + style="bold", + ), + DiagnosticSeverity.Warning: lambda fo: fo.colored( + "warning", + fg="yellow", + bg="black", + style="bold", + ), + DiagnosticSeverity.Information: lambda fo: fo.colored( + "informational", + fg="blue", + bg="black", + style="bold", + ), + DiagnosticSeverity.Hint: lambda fo: fo.colored( + "pedantic", + fg="green", + bg="black", + style="bold", + ), +} + + +def _lines_to_print(range_: Range) -> int: + count = range_.end.line - range_.start.line + if range_.end.character > 0: + count += 1 + return count + + +def _highlight_range( + fo: OutputStylingBase, line: str, line_no: int, range_: Range +) -> str: + line_wo_nl = line.rstrip("\r\n") + start_pos = 0 + prefix = "" + suffix = "" + if line_no == range_.start.line: + start_pos = range_.start.character + prefix = line_wo_nl[0:start_pos] + if line_no == range_.end.line: + end_pos = range_.end.character + suffix = line_wo_nl[end_pos:] + else: + end_pos = len(line_wo_nl) + + marked_part = fo.colored(line_wo_nl[start_pos:end_pos], fg="red", style="bold") + + return prefix + marked_part + suffix + + +def report_diagnostic( + fo: OutputStylingBase, + filename: str, + diagnostic: Diagnostic, + lines: List[str], + auto_fixable: bool, + auto_fixed: bool, + lint_report: LintReport, +) -> None: + logger = _DEFAULT_LOGGER + assert logger is not None + severity = diagnostic.severity + missing_severity = False + if severity is None: + severity = DiagnosticSeverity.Warning + missing_severity = True + if not auto_fixed: + tag_unresolved = _SEVERITY2TAG.get(severity) + if tag_unresolved is None: + tag_unresolved = _SEVERITY2TAG[DiagnosticSeverity.Warning] + lint_report.diagnostics_without_severity += 1 + else: + lint_report.diagnostics_count[severity] += 1 + tag = tag_unresolved(fo) + else: + tag = fo.colored( + "auto-fixing", + fg="magenta", + bg="black", + style="bold", + ) + start_line = diagnostic.range.start.line + start_position = diagnostic.range.start.character + end_line = diagnostic.range.end.line + end_position = diagnostic.range.end.character + has_fixit = "" + line_no_width = len(str(len(lines))) + if not auto_fixed and auto_fixable: + has_fixit = " [Correctable via --auto-fix]" + lint_report.fixable += 1 + print( + f"{tag}: File: {filename}:{start_line+1}:{start_position}:{end_line+1}:{end_position}: {diagnostic.message}{has_fixit}", + ) + if missing_severity: + _warn( + " This warning did not have an explicit severity; Used Warning as a fallback!" + ) + if auto_fixed: + # If it is fixed, there is no reason to show additional context. + lint_report.fixed += 1 + return + lines_to_print = _lines_to_print(diagnostic.range) + if diagnostic.range.end.line >= len(lines) or diagnostic.range.start.line < 1: + lint_report.diagnostic_errors += 1 + _warn( + "Bug in the underlying linter: The line numbers of the warning does not fit in the file..." + ) + return + if lines_to_print == 1: + line = _highlight_range(fo, lines[start_line], start_line, diagnostic.range) + print(f" {start_line+1:{line_no_width}}: {line}") + else: + for line_no in range(start_line, end_line): + line = _highlight_range(fo, lines[line_no], line_no, diagnostic.range) + print(f" {line_no+1:{line_no_width}}: {line}") diff --git a/src/debputy/lsp/__init__.py b/src/debputy/lsp/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/lsp/__init__.py diff --git a/src/debputy/lsp/debian-wordlist.dic b/src/debputy/lsp/debian-wordlist.dic new file mode 100644 index 0000000..11e0438 --- /dev/null +++ b/src/debputy/lsp/debian-wordlist.dic @@ -0,0 +1,333 @@ +_darcs +abi +abs2rel +addon +add-on +addons +add-ons +alioth +api +archs +args +awk +autoconf +automake +autopkgtest +autopkgtests +autoreconf +backport +backportable +backporter +backporters +backporting +backports +bashism +bashisms +basename +bhlc +binNMU +binNMUs +binutils +build-dep +build-deps +buildd +buildds +buildflags +buildsystem +buildsystems +bz2 +bzip2 +ccache +CDBS +cdbs +cdebconf +CFLAGS +changelog +changelogs +chdir +chfn +chmod +chown +chroot +chsh +cli +cvs +cmake +compat +conffile +conffiles +config +cowbuilder +CPPFLAGS +cpu +cron +crond +CSV +cwd +CXXFLAGS +dbg +dbgsym +dbgsyms +db_purge +dch +deb +deb822 +debcrossgen +debs +debstd +debconf +dest +destdir +dest_dir +debhelper +debhelper's +debian +Debian +debputy +Dh_Lib +dfsg +dh +dirs +dirname +distclean +doxygen +dpkg +dpkg's +du +dwz +egrep +elif +elsif +emacs +emacsen +enum +env +envvar +eval +fakeroot +fanotify +fd +fds +fgrep +FHS +filehandle +filehandles +filesystem +filesystems +freebsd +frontend +frontends +FTBFS +FTCBFS +gconf2 +gdb +getopt +GitLab +GitHub +glob +globs +globbing +grep +gunzip +gzip +hardlink +hardlinked +hardlinks +htm +html +HTML +html2text +Indep +indep +idempotent +idempotency +initramfs +inotify +isinstallable +ispell +jpeg +jpg +json +JSON +journalctl +journald +kfreebsd +ksh +ld +ldconfig +LDFLAGS +levenshtein +libexec +libtool +libtoolize +linter +linters +linting +lintian +linux +lua +https +maintscript +maintscripts +makefile +makefiles +manpage +manpages +md5sum +md5sums +menutest +mkdir +mkdirs +mkfontdir +movetousr +mtime +multi-arch +Multi-Arch +multiarch-support +noautodbgsym +noawait +nocheck +nodoc +nohup +noop +noudeb +numpy +numpy3 +objcopy +objdump +OCaml +ok +oldoldstable +oldstable +openssl +param +params +parentdir +parent_dir +passwd +pbuilder +perl +perl5 +pkgfile +pkgfiles +png +preinst +prerm +po4a +po-debconf +pod2man +POSIX +postinst +postrm +Pre-Depends +pwd +py +pyc +pyo +python3 +Python3 +qmake +qmake5 +qmake6 +qt5-qmake +qt6-qmake +rc +rcbug +rcbugs +readlink +realpath +readme +reportbug +rm +rmdir +rpath +R³ +sbuild +sed +setgid +setuid +sha1sum +sha256sum +sha512sum +shlibs +SONAME +SONAMEs +sbin +scrollkeeper +sourcedir +sourcedirs +ssl +stacktrace +stderr +stdin +stdout +subcommand +subcommands +subdir +subdirs +subprocess +subprocesses +subst +substring +substvar +substvars +suid +suidmanager +suidregister +svg +svgz +svn +symlink +symlinked +symlinks +systemctl +systemd +sysusers +sysvinit +t64 +temp +tempdir +tempdirs +tempfile +tempfiles +tls +tmp +tmpfiles +TODO +toml +tomli +TOML +ucf +ucfr +udeb +udebs +udev +uid +umask +undef +uploaders +upstreams +url +urls +URI +URIs +uri +uris +utf-7 +utf-8 +utf-16 +utf-32 +util +utils +usr +vcs +Vcs +wishlist +wm +YAML +yaml +yml +xargs +xml +xz +zsh diff --git a/src/debputy/lsp/logins-and-people.dic b/src/debputy/lsp/logins-and-people.dic new file mode 100644 index 0000000..a7c468b --- /dev/null +++ b/src/debputy/lsp/logins-and-people.dic @@ -0,0 +1,278 @@ + +Aboubakr +Aj +Alessandro +Allbery +Allombert +Alteholz +Américo +Andreas +Andrej +Andrius +Ansgar +Aoki +aph +Appaiah +Aurelien +Axel +Bacher +Badreddin +Banck +Basak +Bastian +Bastien +Basto +Bdale +Beckert +Bengen +Bernd +Bicha +Biebl +Biedl +Bigonville +Bobbio +Bogatov +Bothamy +Boulenguez +Bourg +Boyuan +Braakman +Braud-Santoni +Brederlow +Briscoe-Smith +Brulebois +Burchardt +Byrum +Campagne +Carraway +Cascadian +Changwoo +Christianson +Christoph +cjwatson +Costamagna +Cowgill +Damián +Damir +Didier +Dirson +d'Itri +Dmitry +Dorey +Dorland +Drieu +Durigan +Düsterhus +D'Vine +Dzeko +Eduard +Eisentraut +elbrus +Emel +Engel +Engelhard +Escalante +Evgeni +Fabio +Falavigna +Ferenc +Florian +Frédéric +Fumitoshi +Garbee +Garside +Geissert +Gergely +Gevers +Geyer +Ghe +Ghedini +gilbey +Gillmor +Glondu +Godoy +Golov +Goswin +Göttsche +Grassi +Greffrath +gregor +Grobman +Groenen +Grohne +Guerreiro +Guilhem +guillem +Harald +Hasdal +Hasenack +helmutg +Henriksson +Hernández-Novich +herrmann +Hideki +Hikory +Hilko +Hiroyuki +Hofstaedtler +Holbach +Hommey +Hutchings +Iain +Jakub +Jammet +Jarno +Jelmer +Jens +Jeroen +Jochen +Jordi +Jorgen +Josip +Josselin +Jover +Kastner +Kel +Kis +Kitover +Kitt +Klode +Klose +Knauß +Koeppe +Koschany +Krall +Kumar +Laboissiere +Langasek +Leick +Leidert +Lisandro +Loïc +Luberda +Luca +Lyubimkin +Mallach +Marcin +Marillat +Markus +Martin-Éric +Masanori +Masato +Matej +Mattia +Maximiliano +Mennucc +Merkys +Metzler +Mihai +Miklautz +Minier +Modderman +Modestas +Monfort +Monteiro +Moritz +Mouette +Moulder +Muehlenhoff +Nadav +Nicanor +Niels +Niko +O'Dea +Ondřej +Osamu +Overfiend +Owsiany +Ożarowski +Pahula +Paillard +Pappacoda +Pentchev +Pérez +Pfannenstein +Philipp +Pikulski +Piotr +Plessy +Porras +Possas +Pozuelo +Praveen +Prévot +Raboud +Ragwitz +Raphaël +Reiner +Reyer +Rivero +Rizzolo +Robie +Roeckx +Röhling +rra +Rubén +Ruderich +Ryu +Sandro +Sanou +Sascha +Sateler +Schaefer +Schauer +Schepler +Schertler +Schmelcher +Schot +Schrieffer +Sebastien +Sébastien +Sérgio +Seyeong +Shachnev +Shadura +smcv McVittie +Smedegaard +Sprickerhof +Stapelberg +Steigies +Steinbiss +Stephane +Stéphane +Stribblehill +Suffield +Surý +Tagliamonte +Tambre +Tandy +Taruishi +Theppitak +Thom +Thorsten +Thykier +Tille +Timo +Tranchitella +Triplett +Troup +Ts'o +Tyni +Vainius +Valéry +Verhelst +Vernooij +Villemot +von +Wágner +Wakko +Welte +wferi +Whitton +Wilk +Wouter +Yamane +Yann +zeha +Zeimetz +Zinoviev diff --git a/src/debputy/lsp/lsp_debian_changelog.py b/src/debputy/lsp/lsp_debian_changelog.py new file mode 100644 index 0000000..3ec0b4d --- /dev/null +++ b/src/debputy/lsp/lsp_debian_changelog.py @@ -0,0 +1,186 @@ +import sys +from typing import ( + Union, + List, + Dict, + Iterator, + Optional, + Iterable, +) + +from lsprotocol.types import ( + Diagnostic, + DidOpenTextDocumentParams, + DidChangeTextDocumentParams, + TEXT_DOCUMENT_DID_OPEN, + TEXT_DOCUMENT_DID_CHANGE, + TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL, + TEXT_DOCUMENT_CODE_ACTION, + TEXT_DOCUMENT_DID_CLOSE, + DidCloseTextDocumentParams, + Range, + Position, + DiagnosticSeverity, +) + +from debputy.lsp.lsp_features import lsp_diagnostics, lsp_standard_handler +from debputy.lsp.quickfixes import ( + provide_standard_quickfixes_from_diagnostics, +) +from debputy.lsp.spellchecking import spellcheck_line +from debputy.lsp.text_util import ( + on_save_trim_end_of_line_whitespace, + LintCapablePositionCodec, +) + +try: + from debian._deb822_repro.locatable import Position as TEPosition, Ranage as TERange + + from pygls.server import LanguageServer + from pygls.workspace import TextDocument +except ImportError: + pass + + +# Same as Lintian +_MAXIMUM_WIDTH: int = 82 +_LANGUAGE_IDS = [ + "debian/changelog", + # emacs's name + "debian-changelog", + # vim's name + "debchangelog", +] + +DOCUMENT_VERSION_TABLE: Dict[str, int] = {} + + +def register_dch_lsp(ls: "LanguageServer") -> None: + ls.feature(TEXT_DOCUMENT_DID_OPEN)(_diagnostics_debian_changelog) + ls.feature(TEXT_DOCUMENT_DID_CHANGE)(_diagnostics_debian_changelog) + ls.feature(TEXT_DOCUMENT_DID_CLOSE)(_handle_close) + ls.feature(TEXT_DOCUMENT_CODE_ACTION)( + ls.thread()(provide_standard_quickfixes_from_diagnostics) + ) + ls.feature(TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)(on_save_trim_end_of_line_whitespace) + + +def _handle_close( + ls: "LanguageServer", + params: DidCloseTextDocumentParams, +) -> None: + try: + del DOCUMENT_VERSION_TABLE[params.text_document.uri] + except KeyError: + pass + + +def is_doc_at_version(uri: str, version: int) -> bool: + dv = DOCUMENT_VERSION_TABLE.get(uri) + return dv == version + + +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_CODE_ACTION) +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) + + +@lsp_diagnostics(_LANGUAGE_IDS) +def _diagnostics_debian_changelog( + ls: "LanguageServer", + params: Union[DidOpenTextDocumentParams, DidChangeTextDocumentParams], +) -> Iterable[List[Diagnostic]]: + doc_uri = params.text_document.uri + doc = ls.workspace.get_text_document(doc_uri) + lines = doc.lines + max_words = 1_000 + delta_update_size = 10 + max_lines_between_update = 10 + scanner = _scan_debian_changelog_for_diagnostics( + lines, + doc.position_codec, + delta_update_size, + max_words, + max_lines_between_update, + ) + + yield from scanner + + +def _scan_debian_changelog_for_diagnostics( + lines: List[str], + position_codec: LintCapablePositionCodec, + delta_update_size: int, + max_words: int, + max_lines_between_update: int, + *, + max_line_length: int = _MAXIMUM_WIDTH, +) -> Iterator[List[Diagnostic]]: + diagnostics = [] + diagnostics_at_last_update = 0 + lines_since_last_update = 0 + for line_no, line in enumerate(lines): + orig_line = line + line = line.rstrip() + if not line: + continue + if not line.startswith(" "): + continue + # minus 1 for newline + orig_line_len = len(orig_line) - 1 + if orig_line_len > max_line_length: + range_server_units = Range( + Position( + line_no, + max_line_length, + ), + Position( + line_no, + orig_line_len, + ), + ) + diagnostics.append( + Diagnostic( + position_codec.range_to_client_units(lines, range_server_units), + f"Line exceeds {max_line_length} characters", + severity=DiagnosticSeverity.Hint, + source="debputy", + ) + ) + if len(line) > 3 and line[2] == "[" and line[-1] == "]": + # Do not spell check [ X ] as X is usually a name + continue + lines_since_last_update += 1 + if max_words > 0: + typos = list(spellcheck_line(lines, position_codec, line_no, line)) + new_diagnostics = len(typos) + max_words -= new_diagnostics + diagnostics.extend(typos) + + current_diagnostics_len = len(diagnostics) + if ( + lines_since_last_update >= max_lines_between_update + or current_diagnostics_len - diagnostics_at_last_update > delta_update_size + ): + diagnostics_at_last_update = current_diagnostics_len + lines_since_last_update = 0 + + yield diagnostics + if not diagnostics or diagnostics_at_last_update != len(diagnostics): + yield diagnostics + + +def _lint_debian_changelog( + _doc_reference: str, + _path: str, + lines: List[str], + position_codec: LintCapablePositionCodec, +) -> Optional[List[Diagnostic]]: + limits = sys.maxsize + scanner = _scan_debian_changelog_for_diagnostics( + lines, + position_codec, + limits, + limits, + limits, + ) + return next(iter(scanner), None) diff --git a/src/debputy/lsp/lsp_debian_control.py b/src/debputy/lsp/lsp_debian_control.py new file mode 100644 index 0000000..d00f1c2 --- /dev/null +++ b/src/debputy/lsp/lsp_debian_control.py @@ -0,0 +1,797 @@ +from typing import ( + Union, + Sequence, + Tuple, + Iterator, + Optional, + Iterable, + Mapping, + List, +) + +from debputy.lsp.vendoring._deb822_repro import ( + parse_deb822_file, + Deb822FileElement, + Deb822ParagraphElement, +) +from debputy.lsp.vendoring._deb822_repro.parsing import ( + Deb822KeyValuePairElement, + LIST_SPACE_SEPARATED_INTERPRETATION, +) +from debputy.lsp.vendoring._deb822_repro.tokens import ( + Deb822Token, + tokenize_deb822_file, + Deb822FieldNameToken, +) +from lsprotocol.types import ( + DiagnosticSeverity, + Range, + Diagnostic, + Position, + DidOpenTextDocumentParams, + DidChangeTextDocumentParams, + FoldingRangeKind, + FoldingRange, + FoldingRangeParams, + CompletionItem, + CompletionList, + CompletionParams, + TEXT_DOCUMENT_DID_OPEN, + TEXT_DOCUMENT_DID_CHANGE, + TEXT_DOCUMENT_FOLDING_RANGE, + TEXT_DOCUMENT_COMPLETION, + TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL, + DiagnosticRelatedInformation, + Location, + TEXT_DOCUMENT_HOVER, + HoverParams, + Hover, + TEXT_DOCUMENT_CODE_ACTION, + DiagnosticTag, + SemanticTokensLegend, + TEXT_DOCUMENT_SEMANTIC_TOKENS_FULL, + SemanticTokens, + SemanticTokensParams, +) + +from debputy.lsp.lsp_debian_control_reference_data import ( + DctrlKnownField, + BINARY_FIELDS, + SOURCE_FIELDS, + FieldValueClass, + DctrlFileMetadata, +) +from debputy.lsp.lsp_features import ( + lint_diagnostics, + lsp_completer, + lsp_hover, + lsp_standard_handler, +) +from debputy.lsp.lsp_generic_deb822 import deb822_completer, deb822_hover +from debputy.lsp.quickfixes import ( + propose_remove_line_quick_fix, + range_compatible_with_remove_line_fix, + propose_correct_text_quick_fix, + provide_standard_quickfixes_from_diagnostics, +) +from debputy.lsp.spellchecking import default_spellchecker +from debputy.lsp.text_util import ( + on_save_trim_end_of_line_whitespace, + normalize_dctrl_field_name, + LintCapablePositionCodec, + detect_possible_typo, + te_range_to_lsp, +) +from debputy.util import _info, _error + +try: + from debputy.lsp.vendoring._deb822_repro.locatable import ( + Position as TEPosition, + Range as TERange, + START_POSITION, + ) + + from pygls.server import LanguageServer + from pygls.workspace import TextDocument +except ImportError: + pass + + +_LANGUAGE_IDS = [ + "debian/control", + # emacs's name + "debian-control", + # vim's name + "debcontrol", +] + + +SEMANTIC_TOKENS_LEGEND = SemanticTokensLegend( + token_types=["keyword"], + token_modifiers=[], +) +_DCTRL_FILE_METADATA = DctrlFileMetadata() + + +def register_dctrl_lsp(ls: "LanguageServer") -> None: + try: + from debputy.lsp.vendoring._deb822_repro.locatable import Locatable + except ImportError: + _error( + 'Sorry; this feature requires a newer version of python-debian (with "Locatable").' + ) + + ls.feature(TEXT_DOCUMENT_DID_OPEN)(_diagnostics_debian_control) + ls.feature(TEXT_DOCUMENT_DID_CHANGE)(_diagnostics_debian_control) + ls.feature(TEXT_DOCUMENT_FOLDING_RANGE)(_detect_folding_ranges_debian_control) + ls.feature(TEXT_DOCUMENT_COMPLETION)(_debian_control_completions) + ls.feature(TEXT_DOCUMENT_CODE_ACTION)(provide_standard_quickfixes_from_diagnostics) + ls.feature(TEXT_DOCUMENT_HOVER)(_debian_control_hover) + ls.feature(TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)(on_save_trim_end_of_line_whitespace) + ls.feature(TEXT_DOCUMENT_SEMANTIC_TOKENS_FULL, SEMANTIC_TOKENS_LEGEND)( + _handle_semantic_tokens_full + ) + + +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_CODE_ACTION) +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) + + +@lsp_hover(_LANGUAGE_IDS) +def _debian_control_hover( + ls: "LanguageServer", + params: HoverParams, +) -> Optional[Hover]: + return deb822_hover(ls, params, _DCTRL_FILE_METADATA) + + +@lsp_completer(_LANGUAGE_IDS) +def _debian_control_completions( + ls: "LanguageServer", + params: CompletionParams, +) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]: + return deb822_completer(ls, params, _DCTRL_FILE_METADATA) + + +def _detect_folding_ranges_debian_control( + ls: "LanguageServer", + params: FoldingRangeParams, +) -> Optional[Sequence[FoldingRange]]: + doc = ls.workspace.get_text_document(params.text_document.uri) + comment_start = -1 + folding_ranges = [] + for ( + token, + start_line, + start_offset, + end_line, + end_offset, + ) in _deb822_token_iter(tokenize_deb822_file(doc.lines)): + if token.is_comment: + if comment_start < 0: + comment_start = start_line + _info(f"Detected new comment: {start_line}") + elif comment_start > -1: + comment_start = -1 + folding_range = FoldingRange( + comment_start, + end_line, + kind=FoldingRangeKind.Comment, + ) + + folding_ranges.append(folding_range) + _info(f"Detected folding range: {folding_range}") + + return folding_ranges + + +def _deb822_token_iter( + tokens: Iterable[Deb822Token], +) -> Iterator[Tuple[Deb822Token, int, int, int, int, int]]: + line_no = 0 + line_offset = 0 + + for token in tokens: + start_line = line_no + start_line_offset = line_offset + + newlines = token.text.count("\n") + line_no += newlines + text_len = len(token.text) + if newlines: + if token.text.endswith("\n"): + line_offset = 0 + else: + # -2, one to remove the "\n" and one to get 0-offset + line_offset = text_len - token.text.rindex("\n") - 2 + else: + line_offset += text_len + + yield token, start_line, start_line_offset, line_no, line_offset + + +def _paragraph_representation_field( + paragraph: Deb822ParagraphElement, +) -> Deb822KeyValuePairElement: + return next(iter(paragraph.iter_parts_of_type(Deb822KeyValuePairElement))) + + +def _extract_first_value_and_position( + kvpair: Deb822KeyValuePairElement, + stanza_pos: "TEPosition", + position_codec: "LintCapablePositionCodec", + lines: List[str], +) -> Tuple[Optional[str], Optional[Range]]: + kvpair_pos = kvpair.position_in_parent().relative_to(stanza_pos) + value_element_pos = kvpair.value_element.position_in_parent().relative_to( + kvpair_pos + ) + for value_ref in kvpair.interpret_as( + LIST_SPACE_SEPARATED_INTERPRETATION + ).iter_value_references(): + v = value_ref.value + section_value_loc = value_ref.locatable + value_range_te = section_value_loc.range_in_parent().relative_to( + value_element_pos + ) + section_range_server_units = te_range_to_lsp(value_range_te) + section_range = position_codec.range_to_client_units( + lines, section_range_server_units + ) + return v, section_range + return None, None + + +def _binary_package_checks( + stanza: Deb822ParagraphElement, + stanza_position: "TEPosition", + source_stanza: Deb822ParagraphElement, + representation_field_range: Range, + position_codec: "LintCapablePositionCodec", + lines: List[str], + diagnostics: List[Diagnostic], +) -> None: + ma_kvpair = stanza.get_kvpair_element("Multi-Arch", use_get=True) + arch = stanza.get("Architecture", "any") + if arch == "all" and ma_kvpair is not None: + ma_value, ma_value_range = _extract_first_value_and_position( + ma_kvpair, + stanza_position, + position_codec, + lines, + ) + if ma_value == "same": + diagnostics.append( + Diagnostic( + ma_value_range, + "Multi-Arch: same is not valid for Architecture: all packages. Maybe you want foreign?", + severity=DiagnosticSeverity.Error, + source="debputy", + ) + ) + + package_name = stanza.get("Package", "") + source_section = source_stanza.get("Section") + section_kvpair = stanza.get_kvpair_element("Section", use_get=True) + section: Optional[str] = None + if section_kvpair is not None: + section, section_range = _extract_first_value_and_position( + section_kvpair, + stanza_position, + position_codec, + lines, + ) + else: + section_range = representation_field_range + effective_section = section or source_section or "unknown" + package_type = stanza.get("Package-Type", "") + component_prefix = "" + if "/" in effective_section: + component_prefix, effective_section = effective_section.split("/", maxsplit=1) + component_prefix += "/" + + if package_name.endswith("-udeb") or package_type == "udeb": + if package_type != "udeb": + package_type_kvpair = stanza.get_kvpair_element( + "Package-Type", use_get=True + ) + package_type_range = None + if package_type_kvpair is not None: + _, package_type_range = _extract_first_value_and_position( + package_type_kvpair, + stanza_position, + position_codec, + lines, + ) + if package_type_range is None: + package_type_range = representation_field_range + diagnostics.append( + Diagnostic( + package_type_range, + 'The Package-Type should be "udeb" given the package name', + severity=DiagnosticSeverity.Warning, + source="debputy", + ) + ) + if effective_section != "debian-installer": + quickfix_data = None + if section is not None: + quickfix_data = [ + propose_correct_text_quick_fix( + f"{component_prefix}debian-installer" + ) + ] + diagnostics.append( + Diagnostic( + section_range, + f'The Section should be "{component_prefix}debian-installer" for udebs', + severity=DiagnosticSeverity.Warning, + source="debputy", + data=quickfix_data, + ) + ) + + +def _diagnostics_for_paragraph( + stanza: Deb822ParagraphElement, + stanza_position: "TEPosition", + source_stanza: Deb822ParagraphElement, + known_fields: Mapping[str, DctrlKnownField], + other_known_fields: Mapping[str, DctrlKnownField], + is_binary_paragraph: bool, + doc_reference: str, + position_codec: "LintCapablePositionCodec", + lines: List[str], + diagnostics: List[Diagnostic], +) -> None: + representation_field = _paragraph_representation_field(stanza) + representation_field_pos = representation_field.position_in_parent().relative_to( + stanza_position + ) + representation_field_range_server_units = te_range_to_lsp( + TERange.from_position_and_size( + representation_field_pos, representation_field.size() + ) + ) + representation_field_range = position_codec.range_to_client_units( + lines, + representation_field_range_server_units, + ) + for known_field in known_fields.values(): + missing_field_severity = known_field.missing_field_severity + if missing_field_severity is None or known_field.name in stanza: + continue + + if known_field.inherits_from_source and known_field.name in source_stanza: + continue + + diagnostics.append( + Diagnostic( + representation_field_range, + f"Stanza is missing field {known_field.name}", + severity=missing_field_severity, + source="debputy", + ) + ) + + if is_binary_paragraph: + _binary_package_checks( + stanza, + stanza_position, + source_stanza, + representation_field_range, + position_codec, + lines, + diagnostics, + ) + + seen_fields = {} + + for kvpair in stanza.iter_parts_of_type(Deb822KeyValuePairElement): + field_name_token = kvpair.field_token + field_name = field_name_token.text + field_name_lc = field_name.lower() + normalized_field_name_lc = normalize_dctrl_field_name(field_name_lc) + known_field = known_fields.get(normalized_field_name_lc) + field_value = stanza[field_name] + field_range_te = kvpair.range_in_parent().relative_to(stanza_position) + field_position_te = field_range_te.start_pos + field_range_server_units = te_range_to_lsp(field_range_te) + field_range = position_codec.range_to_client_units( + lines, + field_range_server_units, + ) + field_name_typo_detected = False + existing_field_range = seen_fields.get(normalized_field_name_lc) + if existing_field_range is not None: + existing_field_range[3].append(field_range) + else: + normalized_field_name = normalize_dctrl_field_name(field_name) + seen_fields[field_name_lc] = ( + field_name, + normalized_field_name, + field_range, + [], + ) + + if known_field is None: + candidates = detect_possible_typo(normalized_field_name_lc, known_fields) + if candidates: + known_field = known_fields[candidates[0]] + token_range_server_units = te_range_to_lsp( + TERange.from_position_and_size( + field_position_te, kvpair.field_token.size() + ) + ) + field_range = position_codec.range_to_client_units( + lines, + token_range_server_units, + ) + field_name_typo_detected = True + diagnostics.append( + Diagnostic( + field_range, + f'The "{field_name}" looks like a typo of "{known_field.name}".', + severity=DiagnosticSeverity.Warning, + source="debputy", + data=[ + propose_correct_text_quick_fix(known_fields[m].name) + for m in candidates + ], + ) + ) + if known_field is None: + known_else_where = other_known_fields.get(normalized_field_name_lc) + if known_else_where is not None: + intended_usage = "Source" if is_binary_paragraph else "Package" + diagnostics.append( + Diagnostic( + field_range, + f'The {field_name} is defined for use in the "{intended_usage}" stanza.' + f" Please move it to the right place or remove it", + severity=DiagnosticSeverity.Error, + source="debputy", + ) + ) + continue + + if field_value.strip() == "": + diagnostics.append( + Diagnostic( + field_range, + f"The {field_name} has no value. Either provide a value or remove it.", + severity=DiagnosticSeverity.Error, + source="debputy", + ) + ) + continue + diagnostics.extend( + known_field.field_diagnostics( + kvpair, + stanza_position, + position_codec, + lines, + field_name_typo_reported=field_name_typo_detected, + ) + ) + if known_field.spellcheck_value: + words = kvpair.interpret_as(LIST_SPACE_SEPARATED_INTERPRETATION) + spell_checker = default_spellchecker() + value_position = kvpair.value_element.position_in_parent().relative_to( + field_position_te + ) + for word_ref in words.iter_value_references(): + token = word_ref.value + for word, pos, endpos in spell_checker.iter_words(token): + corrections = spell_checker.provide_corrections_for(word) + if not corrections: + continue + word_loc = word_ref.locatable + word_pos_te = word_loc.position_in_parent().relative_to( + value_position + ) + if pos: + word_pos_te = TEPosition(0, pos).relative_to(word_pos_te) + word_range = TERange( + START_POSITION, + TEPosition(0, endpos - pos), + ) + word_range_server_units = te_range_to_lsp( + TERange.from_position_and_size(word_pos_te, word_range) + ) + word_range = position_codec.range_to_client_units( + lines, + word_range_server_units, + ) + diagnostics.append( + Diagnostic( + word_range, + f'Spelling "{word}"', + severity=DiagnosticSeverity.Hint, + source="debputy", + data=[ + propose_correct_text_quick_fix(c) for c in corrections + ], + ) + ) + source_value = source_stanza.get(field_name) + if known_field.warn_if_default and field_value == known_field.default_value: + diagnostics.append( + Diagnostic( + field_range, + f"The {field_name} is redundant as it is set to the default value and the field should only be" + " used in exceptional cases.", + severity=DiagnosticSeverity.Warning, + source="debputy", + ) + ) + + if known_field.inherits_from_source and field_value == source_value: + if range_compatible_with_remove_line_fix(field_range): + fix_data = propose_remove_line_quick_fix() + else: + fix_data = None + diagnostics.append( + Diagnostic( + field_range, + f"The field {field_name} duplicates the value from the Source stanza.", + severity=DiagnosticSeverity.Information, + source="debputy", + data=fix_data, + ) + ) + for ( + field_name, + normalized_field_name, + field_range, + duplicates, + ) in seen_fields.values(): + if not duplicates: + continue + related_information = [ + DiagnosticRelatedInformation( + location=Location(doc_reference, field_range), + message=f"First definition of {field_name}", + ) + ] + related_information.extend( + DiagnosticRelatedInformation( + location=Location(doc_reference, r), + message=f"Duplicate of {field_name}", + ) + for r in duplicates + ) + for dup_range in duplicates: + diagnostics.append( + Diagnostic( + dup_range, + f"The {normalized_field_name} field name was used multiple times in this stanza." + f" Please ensure the field is only used once per stanza. Note that {normalized_field_name} and" + f" X[BCS]-{normalized_field_name} are considered the same field.", + severity=DiagnosticSeverity.Error, + source="debputy", + related_information=related_information, + ) + ) + + +def _diagnostics_for_field_name( + token: Deb822FieldNameToken, + token_position: "TEPosition", + known_field: DctrlKnownField, + typo_detected: bool, + position_codec: "LintCapablePositionCodec", + lines: List[str], + diagnostics: List[Diagnostic], +) -> None: + field_name = token.text + # Defeat the case-insensitivity from python-debian + field_name_cased = str(field_name) + token_range_server_units = te_range_to_lsp( + TERange.from_position_and_size(token_position, token.size()) + ) + token_range = position_codec.range_to_client_units( + lines, + token_range_server_units, + ) + if known_field.deprecated_with_no_replacement: + diagnostics.append( + Diagnostic( + token_range, + f"{field_name_cased} is deprecated and no longer used", + severity=DiagnosticSeverity.Warning, + source="debputy", + tags=[DiagnosticTag.Deprecated], + data=propose_remove_line_quick_fix(), + ) + ) + elif known_field.replaced_by is not None: + diagnostics.append( + Diagnostic( + token_range, + f"{field_name_cased} is a deprecated name for {known_field.replaced_by}", + severity=DiagnosticSeverity.Warning, + source="debputy", + tags=[DiagnosticTag.Deprecated], + data=propose_correct_text_quick_fix(known_field.replaced_by), + ) + ) + + if not typo_detected and field_name_cased != known_field.name: + diagnostics.append( + Diagnostic( + token_range, + f"Non-canonical spelling of {known_field.name}", + severity=DiagnosticSeverity.Information, + source="debputy", + data=propose_correct_text_quick_fix(known_field.name), + ) + ) + + +def _scan_for_syntax_errors_and_token_level_diagnostics( + deb822_file: Deb822FileElement, + position_codec: LintCapablePositionCodec, + lines: List[str], + diagnostics: List[Diagnostic], +) -> int: + first_error = len(lines) + 1 + spell_checker = default_spellchecker() + for ( + token, + start_line, + start_offset, + end_line, + end_offset, + ) in _deb822_token_iter(deb822_file.iter_tokens()): + if token.is_error: + first_error = min(first_error, start_line) + start_pos = Position( + start_line, + start_offset, + ) + end_pos = Position( + end_line, + end_offset, + ) + token_range = position_codec.range_to_client_units( + lines, Range(start_pos, end_pos) + ) + diagnostics.append( + Diagnostic( + token_range, + "Syntax error", + severity=DiagnosticSeverity.Error, + source="debputy (python-debian parser)", + ) + ) + elif token.is_comment: + for word, pos, end_pos in spell_checker.iter_words(token.text): + corrections = spell_checker.provide_corrections_for(word) + if not corrections: + continue + start_pos = Position( + start_line, + pos, + ) + end_pos = Position( + start_line, + end_pos, + ) + word_range = position_codec.range_to_client_units( + lines, Range(start_pos, end_pos) + ) + diagnostics.append( + Diagnostic( + word_range, + f'Spelling "{word}"', + severity=DiagnosticSeverity.Hint, + source="debputy", + data=[propose_correct_text_quick_fix(c) for c in corrections], + ) + ) + return first_error + + +def _diagnostics_debian_control( + ls: "LanguageServer", + params: Union[DidOpenTextDocumentParams, DidChangeTextDocumentParams], +) -> None: + doc = ls.workspace.get_text_document(params.text_document.uri) + _info(f"Opened document: {doc.path} ({doc.language_id})") + lines = doc.lines + position_codec: LintCapablePositionCodec = doc.position_codec + + diagnostics = _lint_debian_control(doc.uri, doc.path, lines, position_codec) + ls.publish_diagnostics( + doc.uri, + diagnostics, + ) + + +@lint_diagnostics(_LANGUAGE_IDS) +def _lint_debian_control( + doc_reference: str, + _path: str, + lines: List[str], + position_codec: LintCapablePositionCodec, +) -> Optional[List[Diagnostic]]: + diagnostics = [] + deb822_file = parse_deb822_file( + lines, + accept_files_with_duplicated_fields=True, + accept_files_with_error_tokens=True, + ) + + first_error = _scan_for_syntax_errors_and_token_level_diagnostics( + deb822_file, + position_codec, + lines, + diagnostics, + ) + + paragraphs = list(deb822_file) + source_paragraph = paragraphs[0] if paragraphs else None + + for paragraph_no, paragraph in enumerate(paragraphs, start=1): + paragraph_pos = paragraph.position_in_file() + if paragraph_pos.line_position >= first_error: + break + is_binary_paragraph = paragraph_no != 1 + if is_binary_paragraph: + known_fields = BINARY_FIELDS + other_known_fields = SOURCE_FIELDS + else: + known_fields = SOURCE_FIELDS + other_known_fields = BINARY_FIELDS + _diagnostics_for_paragraph( + paragraph, + paragraph_pos, + source_paragraph, + known_fields, + other_known_fields, + is_binary_paragraph, + doc_reference, + position_codec, + lines, + diagnostics, + ) + + return diagnostics + + +def _handle_semantic_tokens_full( + ls: "LanguageServer", + request: SemanticTokensParams, +) -> Optional[SemanticTokens]: + doc = ls.workspace.get_text_document(request.text_document.uri) + lines = doc.lines + deb822_file = parse_deb822_file( + lines, + accept_files_with_duplicated_fields=True, + accept_files_with_error_tokens=True, + ) + tokens = [] + previous_line = 0 + keyword_token = 0 + no_modifiers = 0 + + for paragraph_no, paragraph in enumerate(deb822_file, start=1): + paragraph_position = paragraph.position_in_file() + for kvpair in paragraph.iter_parts_of_type(Deb822KeyValuePairElement): + field_position_without_comments = kvpair.position_in_parent().relative_to( + paragraph_position + ) + field_size = doc.position_codec.client_num_units(kvpair.field_name) + current_line = field_position_without_comments.line_position + line_delta = current_line - previous_line + previous_line = current_line + tokens.append(line_delta) # Line delta + tokens.append(0) # Token delta + tokens.append(field_size) # Token length + tokens.append(keyword_token) + tokens.append(no_modifiers) + + if not tokens: + return None + return SemanticTokens(tokens) diff --git a/src/debputy/lsp/lsp_debian_control_reference_data.py b/src/debputy/lsp/lsp_debian_control_reference_data.py new file mode 100644 index 0000000..f4791cb --- /dev/null +++ b/src/debputy/lsp/lsp_debian_control_reference_data.py @@ -0,0 +1,2067 @@ +import dataclasses +import functools +import itertools +import textwrap +from abc import ABC +from enum import Enum, auto +from typing import ( + FrozenSet, + Optional, + cast, + Mapping, + Iterable, + List, + Generic, + TypeVar, + Union, +) + +from debian.debian_support import DpkgArchTable +from lsprotocol.types import DiagnosticSeverity, Diagnostic, DiagnosticTag + +from debputy.lsp.quickfixes import ( + propose_correct_text_quick_fix, + propose_remove_line_quick_fix, +) +from debputy.lsp.text_util import ( + normalize_dctrl_field_name, + LintCapablePositionCodec, + detect_possible_typo, + te_range_to_lsp, +) +from debputy.lsp.vendoring._deb822_repro.parsing import ( + Deb822KeyValuePairElement, + LIST_SPACE_SEPARATED_INTERPRETATION, + Deb822ParagraphElement, + Deb822FileElement, +) +from debputy.lsp.vendoring._deb822_repro.tokens import Deb822FieldNameToken + +try: + from debputy.lsp.vendoring._deb822_repro.locatable import ( + Position as TEPosition, + Range as TERange, + START_POSITION, + ) +except ImportError: + pass + + +F = TypeVar("F", bound="Deb822KnownField") +S = TypeVar("S", bound="StanzaMetadata") + + +ALL_SECTIONS_WITHOUT_COMPONENT = frozenset( + [ + "admin", + "cli-mono", + "comm", + "database", + "debian-installer", + "debug", + "devel", + "doc", + "editors", + "education", + "electronics", + "embedded", + "fonts", + "games", + "gnome", + "gnu-r", + "gnustep", + "graphics", + "hamradio", + "haskell", + "interpreters", + "introspection", + "java", + "javascript", + "kde", + "kernel", + "libdevel", + "libs", + "lisp", + "localization", + "mail", + "math", + "metapackages", + "misc", + "net", + "news", + "ocaml", + "oldlibs", + "otherosfs", + "perl", + "php", + "python", + "ruby", + "rust", + "science", + "shells", + "sound", + "tasks", + "tex", + "text", + "utils", + "vcs", + "video", + "virtual", + "web", + "x11", + "xfce", + "zope", + ] +) + +ALL_COMPONENTS = frozenset( + [ + "main", + "restricted", # Ubuntu + "non-free", + "non-free-firmware", + "contrib", + ] +) + + +def _fields(*fields: F) -> Mapping[str, F]: + return {normalize_dctrl_field_name(f.name.lower()): f for f in fields} + + +@dataclasses.dataclass(slots=True, frozen=True) +class Keyword: + value: str + hover_text: Optional[str] = None + is_obsolete: bool = False + replaced_by: Optional[str] = None + + +def _allowed_values(*values: Union[str, Keyword]) -> Mapping[str, Keyword]: + as_keywords = (k if isinstance(k, Keyword) else Keyword(k) for k in values) + return {k.value: k for k in as_keywords} + + +ALL_SECTIONS = _allowed_values( + *[ + s if c is None else f"{c}/{s}" + for c, s in itertools.product( + itertools.chain(cast("Iterable[Optional[str]]", [None]), ALL_COMPONENTS), + ALL_SECTIONS_WITHOUT_COMPONENT, + ) + ] +) + + +def all_architectures_and_wildcards(arch2table) -> Iterable[Union[str, Keyword]]: + wildcards = set() + yield Keyword( + "any", + hover_text=textwrap.dedent( + """\ + The package is an architecture dependent package and need to be compiled for each and every + architecture it. + + The name `any` refers to the fact that this is an architecture *wildcard* matching + *any machine architecture* supported by dpkg. + """ + ), + ) + yield Keyword( + "all", + hover_text=textwrap.dedent( + """\ + The package is an architecture independent package. This is typically fitting for packages containing + only scripts, data or documentation. + + This name `all` refers to the fact that the package can be used for *all* architectures at the same. + Though note that it is still subject to the rules of the `Multi-Arch` field. + """ + ), + ) + for arch_name, quad_tuple in arch2table.items(): + yield arch_name + cpu_wc = "any-" + quad_tuple.cpu_name + os_wc = quad_tuple.os_name + "-any" + if cpu_wc not in wildcards: + yield cpu_wc + wildcards.add(cpu_wc) + if os_wc not in wildcards: + yield os_wc + wildcards.add(os_wc) + # Add the remaining wildcards + + +@functools.lru_cache +def dpkg_arch_and_wildcards() -> FrozenSet[str]: + dpkg_arch_table = DpkgArchTable.load_arch_table() + return frozenset(all_architectures_and_wildcards(dpkg_arch_table._arch2table)) + + +class FieldValueClass(Enum): + SINGLE_VALUE = auto() + SPACE_SEPARATED_LIST = auto() + BUILD_PROFILES_LIST = auto() + COMMA_SEPARATED_LIST = auto() + COMMA_SEPARATED_EMAIL_LIST = auto() + FREE_TEXT_FIELD = auto() + DEP5_FILE_LIST = auto() + + +@dataclasses.dataclass(slots=True, frozen=True) +class Deb822KnownField: + name: str + field_value_class: FieldValueClass + warn_if_default: bool = True + replaced_by: Optional[str] = None + deprecated_with_no_replacement: bool = False + missing_field_severity: Optional[DiagnosticSeverity] = None + default_value: Optional[str] = None + known_values: Optional[Mapping[str, Keyword]] = None + unknown_value_diagnostic_severity: Optional[DiagnosticSeverity] = ( + DiagnosticSeverity.Error + ) + hover_text: Optional[str] = None + spellcheck_value: bool = False + is_stanza_name: bool = False + is_single_value_field: bool = True + + def field_diagnostics( + self, + kvpair: Deb822KeyValuePairElement, + stanza_position: "TEPosition", + position_codec: "LintCapablePositionCodec", + lines: List[str], + *, + field_name_typo_reported: bool = False, + ) -> Iterable[Diagnostic]: + field_name_token = kvpair.field_token + field_range_te = kvpair.range_in_parent().relative_to(stanza_position) + field_position_te = field_range_te.start_pos + yield from self._diagnostics_for_field_name( + field_name_token, + field_position_te, + field_name_typo_reported, + position_codec, + lines, + ) + if not self.spellcheck_value: + yield from self._known_value_diagnostics( + kvpair, field_position_te, position_codec, lines + ) + + def _diagnostics_for_field_name( + self, + token: Deb822FieldNameToken, + token_position: "TEPosition", + typo_detected: bool, + position_codec: "LintCapablePositionCodec", + lines: List[str], + ) -> Iterable[Diagnostic]: + field_name = token.text + # Defeat the case-insensitivity from python-debian + field_name_cased = str(field_name) + token_range_server_units = te_range_to_lsp( + TERange.from_position_and_size(token_position, token.size()) + ) + token_range = position_codec.range_to_client_units( + lines, + token_range_server_units, + ) + if self.deprecated_with_no_replacement: + yield Diagnostic( + token_range, + f"{field_name_cased} is deprecated and no longer used", + severity=DiagnosticSeverity.Warning, + source="debputy", + tags=[DiagnosticTag.Deprecated], + data=propose_remove_line_quick_fix(), + ) + elif self.replaced_by is not None: + yield Diagnostic( + token_range, + f"{field_name_cased} is a deprecated name for {self.replaced_by}", + severity=DiagnosticSeverity.Warning, + source="debputy", + tags=[DiagnosticTag.Deprecated], + data=propose_correct_text_quick_fix(self.replaced_by), + ) + + if not typo_detected and field_name_cased != self.name: + yield Diagnostic( + token_range, + f"Non-canonical spelling of {self.name}", + severity=DiagnosticSeverity.Information, + source="debputy", + data=propose_correct_text_quick_fix(self.name), + ) + + def _known_value_diagnostics( + self, + kvpair: Deb822KeyValuePairElement, + field_position_te: "TEPosition", + position_codec: "LintCapablePositionCodec", + lines: List[str], + ) -> Iterable[Diagnostic]: + unknown_value_severity = self.unknown_value_diagnostic_severity + allowed_values = self.known_values + if not allowed_values: + return + hint_text = None + values = kvpair.interpret_as(LIST_SPACE_SEPARATED_INTERPRETATION) + value_off = kvpair.value_element.position_in_parent().relative_to( + field_position_te + ) + first_value = True + for value_ref in values.iter_value_references(): + value = value_ref.value + if ( + not first_value + and self.field_value_class == FieldValueClass.SINGLE_VALUE + ): + value_loc = value_ref.locatable + value_position_te = value_loc.position_in_parent().relative_to( + value_off + ) + value_range_in_server_units = te_range_to_lsp( + TERange.from_position_and_size(value_position_te, value_loc.size()) + ) + value_range = position_codec.range_to_client_units( + lines, + value_range_in_server_units, + ) + yield Diagnostic( + value_range, + f"The field {self.name} can only have exactly one value.", + severity=DiagnosticSeverity.Error, + source="debputy", + ) + # TODO: Add quickfix if the value is also invalid + continue + first_value = False + + known_value = self.known_values.get(value) + if known_value is None: + candidates = detect_possible_typo( + value, + self.known_values, + ) + if hint_text is None: + if len(self.known_values) < 5: + values = ", ".join(sorted(self.known_values)) + hint_text = f" Known values for this field: {values}" + else: + hint_text = "" + fix_data = None + severity = unknown_value_severity + fix_text = hint_text + if candidates: + match = candidates[0] + fix_text = f' It is possible that the value is a typo of "{match}".{fix_text}' + fix_data = [propose_correct_text_quick_fix(m) for m in candidates] + elif severity is None: + continue + if severity is None: + severity = DiagnosticSeverity.Warning + message = fix_text + else: + message = f'The value "{value}" is not supported in {self.name}.{fix_text}' + elif known_value.is_obsolete: + replacement = known_value.replaced_by + if replacement is not None: + message = f'The value "{value}" has been replaced by {replacement}' + severity = DiagnosticSeverity.Warning + fix_data = [propose_correct_text_quick_fix(replacement)] + else: + message = ( + f'The value "{value}" is obsolete without a single replacement' + ) + severity = DiagnosticSeverity.Warning + fix_data = None + else: + # All good + continue + + value_loc = value_ref.locatable + value_position_te = value_loc.position_in_parent().relative_to(value_off) + value_range_in_server_units = te_range_to_lsp( + TERange.from_position_and_size(value_position_te, value_loc.size()) + ) + value_range = position_codec.range_to_client_units( + lines, + value_range_in_server_units, + ) + yield Diagnostic( + value_range, + message, + severity=severity, + source="debputy", + data=fix_data, + ) + + +@dataclasses.dataclass(slots=True, frozen=True) +class DctrlKnownField(Deb822KnownField): + inherits_from_source: bool = False + + +SOURCE_FIELDS = _fields( + DctrlKnownField( + "Source", + FieldValueClass.SINGLE_VALUE, + missing_field_severity=DiagnosticSeverity.Error, + is_stanza_name=True, + hover_text=textwrap.dedent( + """\ + Declares the name of the source package. + + Note this must match the name in the first entry of debian/changelog file. + """ + ), + ), + DctrlKnownField( + "Standards-Version", + FieldValueClass.SINGLE_VALUE, + missing_field_severity=DiagnosticSeverity.Error, + hover_text=textwrap.dedent( + """\ + Declares the last semantic version of the Debian Policy this package as last checked against. + + **Example*: + ``` + Standards-Version: 4.5.2 + ``` + + Note that the last version part of the full Policy version (the **.X** in 4.5.2**.X**) is + typically omitted as it is used solely for editorial changes to the policy (e.g. typo fixes). + """ + ), + ), + DctrlKnownField( + "Section", + FieldValueClass.SINGLE_VALUE, + known_values=ALL_SECTIONS, + unknown_value_diagnostic_severity=DiagnosticSeverity.Warning, + hover_text=textwrap.dedent( + """\ + Define the default section for packages in this source package. + + Example: + ``` + Section: devel + ``` + + Please see https://packages.debian.org/unstable for more details about the sections. + """ + ), + ), + DctrlKnownField( + "Priority", + FieldValueClass.SINGLE_VALUE, + default_value="optional", + warn_if_default=False, + known_values=_allowed_values( + Keyword( + "required", + hover_text=textwrap.dedent( + """\ + The package is necessary for the proper functioning of the system (read: dpkg needs it). + + Applicable if dpkg *needs* this package to function and it is not a library. + + No two packages that both have a priority of *standard* or higher may conflict with + each other. + """ + ), + ), + Keyword( + "important", + hover_text=textwrap.dedent( + """\ + The *important* packages are a bare minimum of commonly-expected and necessary tools. + + Applicable if 99% of all users in the distribution needs this package and it is not a library. + + No two packages that both have a priority of *standard* or higher may conflict with + each other. + """ + ), + ), + Keyword( + "standard", + hover_text=textwrap.dedent( + """\ + These packages provide a reasonable small but not too limited character-mode system. This is + what will be installed by default (by the debian-installer) if the user does not select anything + else. This does not include many large applications. + + Applicable if your distribution installer will install this package by default on a new system + and it is not a library. + + No two packages that both have a priority of *standard* or higher may conflict with + each other. + """ + ), + ), + Keyword( + "optional", + hover_text="This is the default priority and used by the majority of all packages" + " in the Debian archive", + ), + Keyword( + "extra", + is_obsolete=True, + replaced_by="optional", + hover_text="Obsolete alias of `optional`.", + ), + ), + hover_text=textwrap.dedent( + """\ + Define the default priority for packages in this source package. + + The priority field describes how important the package is for the functionality of the system. + + Example: + ``` + Priority: optional + ``` + + Unless you know you need a different value, you should choose <b>optional</b> for your packages. + """ + ), + ), + DctrlKnownField( + "Maintainer", + FieldValueClass.SINGLE_VALUE, + missing_field_severity=DiagnosticSeverity.Error, + hover_text=textwrap.dedent( + """\ + The maintainer of the package. + + **Example**: + ``` + Maintainer: Jane Contributor <jane@janes.email-provider.org> + ``` + + Note: If a person is listed in the Maintainer field, they should *not* be listed in Uploaders field. + """ + ), + ), + DctrlKnownField( + "Uploaders", + FieldValueClass.COMMA_SEPARATED_EMAIL_LIST, + hover_text=textwrap.dedent( + """\ + Comma separated list of uploaders associated with the package. + + **Example**: + ``` + Uploaders: + John Doe <john@doe.org>, + Lisbeth Worker <lis@worker.org>, + ``` + + Formally uploaders are considered co-maintainers for the package with the party listed in the + **Maintainer** field being the primary maintainer. In practice, each maintainer or maintenance + team can have their own ruleset about the difference between the **Maintainer** and the + **Uploaders**. As an example, the Python packaging team has a different rule set for how to + react to a package depending on whether the packaging team is the **Maintainer** or in the + **Uploaders** field. + + Note: If a person is listed in the Maintainer field, they should *not* be listed in Uploaders field. + """ + ), + ), + DctrlKnownField( + "Vcs-Browser", + FieldValueClass.SINGLE_VALUE, + hover_text=textwrap.dedent( + """\ + URL to the Version control system repo used for the packaging. The URL should be usable with a + browser *without* requiring any login. + + This should be used together with one of the other **Vcs-** fields. + """ + ), + ), + DctrlKnownField( + "Vcs-Git", + FieldValueClass.SPACE_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + URL to the git repo used for the packaging. The URL should be usable with `git clone` + *without* requiring any login. + + This should be used together with the **Vcs-Browser** field provided there is a web UI for the repo. + + Note it is possible to specify a branch via the `-b` option. + + ``` + Vcs-Git: https://salsa.debian.org/some/packaging-repo -b debian/unstable + ``` + """ + ), + ), + DctrlKnownField( + "Vcs-Svn", + FieldValueClass.SPACE_SEPARATED_LIST, # TODO: Might be a single value + hover_text=textwrap.dedent( + """\ + URL to the git repo used for the packaging. The URL should be usable with `svn checkout` + *without* requiring any login. + + This should be used together with the **Vcs-Browser** field provided there is a web UI for the repo. + ``` + """ + ), + ), + DctrlKnownField( + "Vcs-Arch", + FieldValueClass.SPACE_SEPARATED_LIST, # TODO: Might be a single value + hover_text=textwrap.dedent( + """\ + URL to the git repo used for the packaging. The URL should be usable for getting a copy of the + sources *without* requiring any login. + + This should be used together with the **Vcs-Browser** field provided there is a web UI for the repo. + ``` + """ + ), + ), + DctrlKnownField( + "Vcs-Cvs", + FieldValueClass.SPACE_SEPARATED_LIST, # TODO: Might be a single value + hover_text=textwrap.dedent( + """\ + URL to the git repo used for the packaging. The URL should be usable for getting a copy of the + sources *without* requiring any login. + + This should be used together with the **Vcs-Browser** field provided there is a web UI for the repo. + ``` + """ + ), + ), + DctrlKnownField( + "Vcs-Darcs", + FieldValueClass.SPACE_SEPARATED_LIST, # TODO: Might be a single value + hover_text=textwrap.dedent( + """\ + URL to the git repo used for the packaging. The URL should be usable for getting a copy of the + sources *without* requiring any login. + + This should be used together with the **Vcs-Browser** field provided there is a web UI for the repo. + ``` + """ + ), + ), + DctrlKnownField( + "Vcs-Hg", + FieldValueClass.SPACE_SEPARATED_LIST, # TODO: Might be a single value + hover_text=textwrap.dedent( + """\ + URL to the git repo used for the packaging. The URL should be usable for getting a copy of the + sources *without* requiring any login. + + This should be used together with the **Vcs-Browser** field provided there is a web UI for the repo. + ``` + """ + ), + ), + DctrlKnownField( + "Vcs-Mtn", + FieldValueClass.SPACE_SEPARATED_LIST, # TODO: Might be a single value + hover_text=textwrap.dedent( + """\ + URL to the git repo used for the packaging. The URL should be usable for getting a copy of the + sources *without* requiring any login. + + This should be used together with the **Vcs-Browser** field provided there is a web UI for the repo. + ``` + """ + ), + ), + DctrlKnownField( + "DM-Upload-Allowed", + FieldValueClass.SINGLE_VALUE, + deprecated_with_no_replacement=True, + default_value="no", + known_values=_allowed_values("yes", "no"), + hover_text=textwrap.dedent( + """\ + Obsolete field + + It was used to enabling Debian Maintainers to upload the package without requiring a Debian Developer + to sign the package. This mechanism has been replaced by a new authorization mechanism. + + Please see https://lists.debian.org/debian-devel-announce/2012/09/msg00008.html for details about the + replacement. + ``` + """ + ), + ), + DctrlKnownField( + "Build-Depends", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + All minimum build-dependencies for this source package. Needed for any target including **clean**. + """ + ), + ), + DctrlKnownField( + "Build-Depends-Arch", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Build-dependencies required for building the architecture dependent binary packages of this source + package. + + These build-dependencies must be satisfied when executing the **build-arch** and **binary-arch** + targets either directly or indirectly in addition to those listed in **Build-Depends**. + + Note that these dependencies are <em>not</em> available during **clean**. + """ + ), + ), + DctrlKnownField( + "Build-Depends-Indep", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Build-dependencies required for building the architecture independent binary packages of this source + package. + + These build-dependencies must be satisfied when executing the **build-indep** and **binary-indep** + targets either directly or indirectly in addition to those listed in **Build-Depends**. + + Note that these dependencies are <em>not</em> available during **clean**. + """ + ), + ), + DctrlKnownField( + "Build-Conflicts", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Packages that must **not** be installed during **any** part of the build, including the **clean** + target **clean**. + + Where possible, it is often better to configure the build so that it does not react to the package + being present in the first place. Usually this is a question of using a `--without-foo` or + `--disable-foo` or such to the build configuration. + """ + ), + ), + DctrlKnownField( + "Build-Conflicts-Arch", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Packages that must **not** be installed during the **build-arch** or **binary-arch** targets. + This also applies when these targets are run implicitly such as via the **binary** target. + + Where possible, it is often better to configure the build so that it does not react to the package + being present in the first place. Usually this is a question of using a `--without-foo` or + `--disable-foo` or such to the build configuration. + """ + ), + ), + DctrlKnownField( + "Build-Conflicts-Indep", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Packages that must **not** be installed during the **build-indep** or **binary-indep** targets. + This also applies when these targets are run implicitly such as via the **binary** target. + + Where possible, it is often better to configure the build so that it does not react to the package + being present in the first place. Usually this is a question of using a `--without-foo` or + `--disable-foo` or such to the build configuration. + """ + ), + ), + DctrlKnownField( + "Testsuite", + FieldValueClass.SPACE_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Declares that this package provides or should run install time tests via `autopkgtest`. + + This field can be used to request an automatically generated autopkgtests via the **autodep8** package. + Please refer to the documentation of the **autodep8** package for which values you can put into + this field and what kind of testsuite the keywords will provide. + + Declaring this field in *debian/control* is only necessary when you want additional tests beyond + those in *debian/tests/control* as **dpkg** automatically records the package provided ones from + *debian/tests/control*. + """ + ), + ), + DctrlKnownField( + "Homepage", + FieldValueClass.SINGLE_VALUE, + hover_text=textwrap.dedent( + """\ + Link to the upstream homepage for this source package. + + **Example**: + ``` + Homepage: https://www.janes-tools.org/frob-cleaner + ``` + """ + ), + ), + DctrlKnownField( + "Rules-Requires-Root", + FieldValueClass.SPACE_SEPARATED_LIST, + unknown_value_diagnostic_severity=None, + known_values=_allowed_values( + Keyword( + "no", + hover_text=textwrap.dedent( + """\ + The build process will not require root or fakeroot during any step. This enables + dpkg-buildpackage and debhelper to perform several optimizations during the build. + + This is the default with dpkg-build-api at version 1 or later. + """ + ), + ), + Keyword( + "no", + hover_text=textwrap.dedent( + """\ + The build process assumes that dpkg-buildpackage will run the relevant binary + target with root or fakeroot. This was the historical default behaviour. + + This is the default with dpkg-build-api at version 0. + """ + ), + ), + ), + hover_text=textwrap.dedent( + """\ + Declare if and when the package build assumes it is run as root or fakeroot. + + Most packages do not need to run as root or fakeroot and the legacy behaviour comes with a + performance cost. This field can be used to explicitly declare that the legacy behaviour is + unnecessary. + + **Example:** + ``` + Rules-Requires-Root: no + ``` + + Setting this field to `no` *can* cause the package to stop building if it requires root. + Depending on the situation, it might require some trivial or some complicated changes to fix that. + If it breaks and you cannot figure out how to fix it, then reset the field to `binary-targets` + and move on until you have time to fix it. + + The default value for this field depends on the ``dpkg-build-api`` version. If the package + `` Build-Depends`` on ``dpkg-build-api (>= 1)`` or later, the default is ``no``. Otherwise, + the default is ``binary-target`` + + Note it is **not** possible to require running the package as "true root". + """ + ), + ), + DctrlKnownField( + "Bugs", + FieldValueClass.SINGLE_VALUE, + hover_text=textwrap.dedent( + """\ + Provide a custom bug tracker URL + + This field is *not* used by packages uploaded to Debian or most derivatives as the distro tooling + has a default bugtracker built-in. It is primarily useful for third-party provided packages such + that bug reporting tooling can redirect the user to their bug tracker. + """ + ), + ), + DctrlKnownField( + "Origin", + FieldValueClass.SINGLE_VALUE, + hover_text=textwrap.dedent( + """\ + Declare the origin of the package. + + This field is *not* used by packages uploaded to Debian or most derivatives as the origin would + be the distribution. It is primarily useful for third-party provided packages as some tools will + detect this field. + """ + ), + ), + DctrlKnownField( + "X-Python-Version", + FieldValueClass.COMMA_SEPARATED_LIST, + replaced_by="X-Python3-Version", + hover_text=textwrap.dedent( + """\ + Obsolete field for declaring the supported Python2 versions + + Since Python2 is no longer supported, this field is now redundant. For Python3, the field is + called **X-Python3-Version**. + """ + ), + ), + DctrlKnownField( + "X-Python3-Version", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + # Too lazy to provide a better description + """\ + For declaring the supported Python3 versions + + This is used by the tools from `dh-python` package. Please see the documentation of that package + for when and how to use it. + """ + ), + ), + DctrlKnownField( + "XS-Autobuild", + FieldValueClass.SINGLE_VALUE, + known_values=_allowed_values("yes"), + hover_text=textwrap.dedent( + """\ + Used for non-free packages to denote that they may be auto-build on the Debian build infrastructure + + Note that adding this field **must** be combined with following the instructions at + https://www.debian.org/doc/manuals/developers-reference/pkgs.html#non-free-buildd + """ + ), + ), + DctrlKnownField( + "Description", + FieldValueClass.FREE_TEXT_FIELD, + spellcheck_value=True, + hover_text=textwrap.dedent( + """\ + This field contains a human-readable description of the package. However, it is not used directly. + + Binary packages can reference parts of it via the `${source:Synopsis}` and the + `${source:Extended-Description}` substvars. Without any of these substvars, the `Description` field + of the `Source` stanza remains unused. + + The first line immediately after the field is called the *Synopsis* and is a short "noun-phrase" + intended to provide a one-line summary of a package. The lines after the **Synopsis** is known + as the **Extended Description** and is intended as a longer summary of a package. + + **Example:** + ``` + Description: documentation generator for Python projects + Sphinx is a tool for producing documentation for Python projects, using + reStructuredText as markup language. + . + Sphinx features: + * HTML, CHM, LaTeX output, + * Cross-referencing source code, + * Automatic indices, + * Code highlighting, using Pygments, + * Extensibility. Existing extensions: + - automatic testing of code snippets, + - including docstrings from Python modules. + . + Build-depend on sphinx if your package uses /usr/bin/sphinx-* + executables. Build-depend on python3-sphinx if your package uses + the Python API (for instance by calling python3 -m sphinx). + ``` + + The **Synopsis** is usually displayed in cases where there is limited space such as when reviewing + the search results from `apt search foo`. It is often a good idea to imagine that the **Synopsis** + part is inserted into a sentence like "The package provides {{Synopsis-goes-here}}". The + **Extended Description** is a standalone description that should describe what the package does and + how it relates to the rest of the system (in terms of, for example, which subsystem it is which part of). + Please see https://www.debian.org/doc/debian-policy/ch-controlfields.html#description for more details + about the description field and suggestions for how to write it. + """ + ), + ), +) + +BINARY_FIELDS = _fields( + DctrlKnownField( + "Package", + FieldValueClass.SINGLE_VALUE, + is_stanza_name=True, + missing_field_severity=DiagnosticSeverity.Error, + hover_text="Declares the name of a binary package", + ), + DctrlKnownField( + "Package-Type", + FieldValueClass.SINGLE_VALUE, + default_value="deb", + known_values=_allowed_values( + Keyword("deb", hover_text="The package will be built as a regular deb."), + Keyword( + "udeb", + hover_text="The package will be built as a micro-deb (also known as a udeb). These are solely used by the debian-installer.", + ), + ), + hover_text=textwrap.dedent( + """\ + **Special-purpose only**. *This field is a special purpose field and is rarely needed.* + *You are recommended to omit unless you know you need it or someone told you to use it.* + + Determines the type of package. This field can be used to declare that a given package is a different + type of package than usual. The primary case where this is known to be useful is for building + micro-debs ("udeb") to be consumed by the debian-installer. + """ + ), + ), + DctrlKnownField( + "Architecture", + FieldValueClass.SPACE_SEPARATED_LIST, + missing_field_severity=DiagnosticSeverity.Error, + unknown_value_diagnostic_severity=None, + known_values=_allowed_values(*dpkg_arch_and_wildcards()), + hover_text=textwrap.dedent( + """\ + Determines which architectures this package can be compiled for or if it is an architecture-independent + package. The value is a space-separated list of dpkg architecture names or wildcards. + + **Example**: + ``` + Package: architecture-specific-package + Architecture: any + # ... + + + Package: data-only-package + Architecture: all + Multi-Arch: foreign + # ... + + + Package: linux-only-package + Architecture: linux-any + # ... + ``` + + When in doubt, stick to the values **all** (for scripts, data or documentation, etc.) or **any** + (for anything that can be compiled). For official Debian packages, it is often easier to attempt the + compilation for unsupported architectures than to maintain the list of machine architectures that work. + """ + ), + ), + DctrlKnownField( + "Essential", + FieldValueClass.SINGLE_VALUE, + default_value="no", + known_values=_allowed_values( + Keyword( + "yes", + hover_text="The package is essential and uninstalling it will completely and utterly break the" + " system beyond repair.", + ), + Keyword( + "no", + hover_text=textwrap.dedent( + """\ + The package is a regular package. This is the default and recommended.</p> + + Note that declaring a package to be "Essential: no" is the same as not having the field except omitting + the field wastes fewer bytes on everyone's hard disk. + """ + ), + ), + ), + hover_text=textwrap.dedent( + """\ + **Special-purpose only**. *This field is a special purpose field and is rarely needed.* + *You are recommended to omit unless you know you need it or someone told you to use it.* + + Whether the package should be considered Essential as defined by Debian Policy. + + Essential packages are subject to several distinct but very important rules: + + * Essential packages are considered essential for the system to work. The packaging system + (APT and dpkg) will refuse to uninstall it without some very insisting force options and warnings. + + * Other packages are not required to declare explicit dependencies on essential packages as a + side-effect of the above except as to ensure a that the given essential package is upgraded + to a given minimum version. + + * Once installed, essential packages function must at all time no matter where dpkg is in its + installation or upgrade process. During bootstrapping or installation, this requirement is + relaxed. + """ + ), + ), + DctrlKnownField( + "XB-Important", + FieldValueClass.SINGLE_VALUE, + replaced_by="Protected", + default_value="no", + known_values=_allowed_values( + Keyword( + "yes", + hover_text="The package is protected and attempts to uninstall it will cause strong warnings to the" + " user that they might be breaking the system.", + ), + Keyword( + "no", + hover_text=textwrap.dedent( + """\ + The package is a regular package. This is the default and recommended.</p> + + Note that declaring a package to be `XB-Important: no` is the same as not having the field + except omitting the field wastes fewer bytes on everyone's hard-disk. + """ + ), + ), + ), + ), + DctrlKnownField( + "Protected", + FieldValueClass.SINGLE_VALUE, + default_value="no", + known_values=_allowed_values( + Keyword( + "yes", + hover_text="The package is protected and attempts to uninstall it will cause strong warnings to the" + " user that they might be breaking the system.", + ), + Keyword( + "no", + hover_text=textwrap.dedent( + """\ + The package is a regular package. This is the default and recommended.</p> + + Note that declaring a package to be `Protected: no` is the same as not having the field + except omitting the field wastes fewer bytes on everyone's hard-disk. + """ + ), + ), + ), + ), + DctrlKnownField( + "Pre-Depends", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + **Advanced field**. *This field covers an advanced topic. If you are new to packaging, you are* + *probably not looking for this field (except to set a **${misc:Pre-Depends}** relation. Incorrect use* + *of this field can cause issues - among other causing issues during upgrades that users cannot work* + *around without passing `--force-*` options to dpkg.* + + This field is like *Depends*, except that is also forces dpkg to complete installation of the packages + named before even starting the installation of the package which declares the pre-dependency. + + **Example**: + ``` + Pre-Depends: ${misc:Pre-Depends} + ``` + + Note this is a very strong dependency and not all packages support being a pre-dependency because it + puts additional requirements on the package being depended on. Use of **${misc:Pre-Depends}** is + pre-approved and recommended. Essential packages are known to support being in **Pre-Depends**. + However, careless use of **Pre-Depends** for essential packages can still cause dependency resolvers + problems. + """ + ), + ), + DctrlKnownField( + "Depends", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Lists the packages that must be installed, before this package is installed. + + **Example**: + ``` + Package: foo + Architecture: any + Depends: ${misc:Depends}, + ${shlibs:Depends}, + libfoo1 (= ${binary:Version}), + foo-data (= ${source:Version}), + ``` + + This field declares an absolute dependency. Before installing the package, **dpkg** will require + all dependencies to be in state `configured` first. Though, if there is a circular dependency between + two or more packages, **dpkg** will break that circle at an arbitrary point where necessary based on + built-in heuristics. + + This field should be used if the depended-on package is required for the depending package to provide a + *significant amount of functionality* or when it is used in the **postinst** or **prerm** maintainer + scripts. + """ + ), + ), + DctrlKnownField( + "Recommends", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Lists the packages that *should* be installed when this package is installed in all but + *unusual installations*.</p> + + **Example**: + ``` + Recommends: foo-optional + ``` + + By default, APT will attempt to install recommends unless they cannot be installed or the user + has configured APT skip recommends. Notably, during automated package builds for the Debian + archive, **Recommends** are **not** installed. + + As implied, the package must have some core functionality that works **without** the + **Recommends** being satisfied as they are not guaranteed to be there. If the package cannot + provide any functionality without a given package, that package should be in **Depends**. + """ + ), + ), + DctrlKnownField( + "Suggests", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Lists the packages that may make this package more useful but not installing them is perfectly + reasonable as well. Suggests can also be useful for add-ons that only make sense in particular + corner cases like supporting a non-standard file format. + + **Example**: + ``` + Suggests: bar + ``` + """ + ), + ), + DctrlKnownField( + "Enhances", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + This field is similar to Suggests but works in the opposite direction. It is used to declare that + this package can enhance the functionality of another package. + + **Example**: + ``` + Package: foo + Provide: debputy-plugin-foo + Enhances: debputy + ``` + """ + ), + ), + DctrlKnownField( + "Provides", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + Declare this package also provide one or more other packages. This means that this package can + substitute for the provided package in some relations. + + *Example*: + ``` + Package: foo + ... + + Package: foo-plus + Provides: foo (= ${source:Upstream-Version}) + ``` + + If the provides relation is versioned, it must use a "strictly equals" version. If it does not + declare a version, then it *cannot* be used to satisfy a dependency with a version restriction. + Consider the following example: + + **Archive scenario*: (This is *not* a debian/control file, despite the resemblance) + ``` + Package foo + Depends: bar (>= 1.0) + + Package: bar + Version: 0.9 + + Package: bar-plus + Provides: bar (= 1.0) + + Package: bar-clone + Provides: bar + ``` + + In this archive scenario, the `bar-plus` package will satisfy the dependency of `foo` as the + only one. The `bar` package fails because the version is only *0.9* and `bar-clone` because + the provides is unversioned, but the dependency clause is versioned. + """ + ), + ), + DctrlKnownField( + "Conflicts", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + **Warning**: *You may be looking for Breaks instead of Conflicts*. + + This package cannot be installed together with the packages listed in the Conflicts field. This + is a *bigger hammer* than **Breaks** and is used sparingly. Notably, if you want to do a versioned + **Conflicts** then you *almost certainly* want **Breaks** instead. + + **Example**: + ``` + Conflicts: bar + ``` + + Please check the description of the **Breaks** field for when you would use **Breaks** vs. + **Conflicts**. + + Note if a package conflicts with itself (indirectly or via **Provides**), then it is using a + special rule for **Conflicts**. See section + 7.6.2 "[Replacing whole packages, forcing their removal]" in the Debian Policy Manual. + + [Replacing whole packages, forcing their removal]: https://www.debian.org/doc/debian-policy/ch-relationships.html#replacing-whole-packages-forcing-their-removal + """ + ), + ), + DctrlKnownField( + "Breaks", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + This package cannot be installed together with the packages listed in the `Breaks` field. + + This is often use to declare versioned issues such as "This package does not work with foo if + it is version 1.0 or less". In comparison, `Conflicts` is generally used to declare that + "This package does not work at all as long as foo is installed". + + **Example**: + ``` + Breaks: bar (<= 1.0~) + ```` + + **Breaks vs. Conflicts**: + + * I moved files from **foo** to **bar** in version X, what should I do? + + Add `Breaks: foo (<< X~)` + `Replaces: foo (<< X~)` to **bar** + + * Upgrading **bar** while **foo** is version X or less causes problems **foo** or **bar** to break. + How do I solve this? + + Add `Breaks: foo (<< X~)` to **bar** + + * The **foo** and **bar** packages provide the same functionality (interface) but different + implementations and there can be at most one of them. What should I do? + + See section 7.6.2 [Replacing whole packages, forcing their removal] in the Debian Policy Manual. + + * How to handle when **foo** and **bar** packages are unrelated but happen to provide the same binary? + + Attempt to resolve the name conflict by renaming the clashing files in question on either (or both) sides. + + Note the use of *~* in version numbers in the answers are generally used to ensure this works correctly in + case of a backports (in the Debian archive), where the package is rebuilt with the "~bpo" suffix in its + version. + + [Replacing whole packages, forcing their removal]: https://www.debian.org/doc/debian-policy/ch-relationships.html#replacing-whole-packages-forcing-their-removal + """ + ), + ), + DctrlKnownField( + "Replaces", + FieldValueClass.COMMA_SEPARATED_LIST, + hover_text=textwrap.dedent( + """\ + This package either replaces another package or overwrites files that used to be provided by + another package. + + **Attention**: The `Replaces` field is **always** used with either `Breaks` or `Conflicts` field. + + **Example**: + ``` + Package: foo + ... + + # The foo package was split to move data files into foo-data in version 1.2-3 + Package: foo-data + Replaces: foo (<< 1.2-3~) + Breaks: foo (<< 1.2-3~) + ``` + + Please check the description of the `Breaks` field for when you would use `Breaks` vs. `Conflicts`. + It also covers common uses of `Replaces`. + """ + ), + ), + DctrlKnownField( + "Build-Profiles", + FieldValueClass.BUILD_PROFILES_LIST, + hover_text=textwrap.dedent( + """\ + **Advanced field**. *This field covers an advanced topic. If you are new to packaging, you are* + *advised to leave it at its default until you have a working basic package or lots of time to understand* + *this topic.* + + Declare that the package will only built when the given build-profiles are satisfied. + + This field is primarily used in combination with build profiles inside the build dependency related fields + to reduce the number of build dependencies required during bootstrapping of a new architecture. + + **Example*: + ``` + Package: foo + ... + + Package: foo-udeb + Package-Type: udeb + # Skip building foo-udeb when the build profile "noudeb" is set (e.g., via dpkg-buildpackage -Pnoudeb) + Build-Profiles: <!noudeb> + ``` + + Note that there is an official list of "common" build profiles with predefined purposes along with rules + for how and when the can be used. This list can be found at + https://wiki.debian.org/BuildProfileSpec#Registered_profile_names. + """ + ), + ), + DctrlKnownField( + "Section", + FieldValueClass.SINGLE_VALUE, + missing_field_severity=DiagnosticSeverity.Error, + inherits_from_source=True, + known_values=ALL_SECTIONS, + unknown_value_diagnostic_severity=DiagnosticSeverity.Warning, + hover_text=textwrap.dedent( + """\ + Define the section for this package. + + Example: + ``` + Section: devel + ``` + + Please see https://packages.debian.org/unstable for more details about the sections. + """ + ), + ), + DctrlKnownField( + "Priority", + FieldValueClass.SINGLE_VALUE, + default_value="optional", + warn_if_default=False, + missing_field_severity=DiagnosticSeverity.Error, + inherits_from_source=True, + known_values=_allowed_values( + Keyword( + "required", + hover_text=textwrap.dedent( + """\ + The package is necessary for the proper functioning of the system (read: dpkg needs it). + + Applicable if dpkg *needs* this package to function and it is not a library. + + No two packages that both have a priority of *standard* or higher may conflict with + each other. + """ + ), + ), + Keyword( + "important", + hover_text=textwrap.dedent( + """\ + The *important* packages are a bare minimum of commonly-expected and necessary tools. + + Applicable if 99% of all users in the distribution needs this package and it is not a library. + + No two packages that both have a priority of *standard* or higher may conflict with + each other. + """ + ), + ), + Keyword( + "standard", + hover_text=textwrap.dedent( + """\ + These packages provide a reasonable small but not too limited character-mode system. This is + what will be installed by default (by the debian-installer) if the user does not select anything + else. This does not include many large applications. + + Applicable if your distribution installer will install this package by default on a new system + and it is not a library. + + No two packages that both have a priority of *standard* or higher may conflict with + each other. + """ + ), + ), + Keyword( + "optional", + hover_text="This is the default priority and used by the majority of all packages" + " in the Debian archive", + ), + Keyword( + "extra", + is_obsolete=True, + replaced_by="optional", + hover_text="Obsolete alias of `optional`.", + ), + ), + hover_text=textwrap.dedent( + """\ + Define the priority this package. + + The priority field describes how important the package is for the functionality of the system. + + Example: + ``` + Priority: optional + ``` + + Unless you know you need a different value, you should choose <b>optional</b> for your packages. + """ + ), + ), + DctrlKnownField( + "Multi-Arch", + FieldValueClass.SINGLE_VALUE, + # Explicit "no" tends to be used as "someone reviewed this and concluded no", so we do + # not warn about it being explicitly "no". + warn_if_default=False, + default_value="no", + known_values=_allowed_values( + Keyword( + "no", + hover_text=textwrap.dedent( + """\ + The default. The package can be installed for at most one architecture at the time. It can + *only* satisfy relations for the same architecture as itself. Note that `Architecture: all` + packages are considered as a part of the system's "primary" architecture (see output of + `dpkg --print-architecture`). + + Note: Despite the "no", the package *can* be installed for a foreign architecture (as an example, + you can install a 32-bit version of a package on a 64-bit system). However, packages depending + on it must also be installed for the foreign architecture. + """ + ), + ), + Keyword( + "foreign", + hover_text=textwrap.dedent( + """\ + The package can be installed for at most one architecture at the time. However, it can + satisfy relations for packages regardless of their architecture. This is often useful for packages + solely providing data or binaries that have "Multi-Arch neutral interfaces". + + Sadly, describing a "Multi-Arch neutral interface" is hard and often only done by Multi-Arch + experts on a case-by-case basis. Some programs and scripts have "Multi-Arch dependent interfaces" + and are not safe to declare as `Multi-Arch: foreign`. + + The name "foreign" refers to the fact that the package can satisfy relations for native + *and foreign* architectures at the same time. + """ + ), + ), + Keyword( + "same", + hover_text=textwrap.dedent( + """\ + The same version of the package can be co-installed for multiple architecture. However, + for this to work, the package *must* ship all files in architecture unique paths (usually + beneath `/usr/lib/<DEB_HOST_MULTIARCH>`) or have bit-for-bit identical content + in files that are in non-architecture unique paths (such as files beneath `/usr/share/doc`). + + The name `same` refers to the fact that the package can satisfy relations only for the `same` + architecture as itself. However, in this case, it is co-installable with itself as noted above. + Note: This value **cannot** be used with `Architecture: all`. + """ + ), + ), + Keyword( + "allowed", + hover_text=textwrap.dedent( + """\ + **Advanced value**. The package is *not* co-installable with itself but can satisfy Multi-Arch + foreign and Multi-Arch same relations at the same. This is useful for implementations of + scripting languages (such as Perl or Python). Here the interpreter contextually need to + satisfy some relations as `Multi-Arch: foreign` and others as `Multi-Arch: same`. + + Typically, native extensions or plugins will need a `Multi-Arch: same`-relation as they only + work with the interpreter compiled for the same machine architecture as themselves whereas + scripts are usually less picky and can rely on the `Multi-Arch: foreign` relation. Packages + wanting to rely on the "Multi-Arch: foreign" interface must explicitly declare this adding a + `:any` suffix to the package name in the dependency relation (e.g. `Depends: python3:any`). + However, the `:any"`suffix cannot be used unconditionally and should not be used unless you + know you need it. + """ + ), + ), + ), + hover_text=textwrap.dedent( + """\ + **Advanced field**. *This field covers an advanced topic. If you are new to packaging, you are* + *advised to leave it at its default until you have a working basic package or lots of time to understand* + *this topic.* + + This field is used to declare the Multi-Arch interface of the package. + + The `Multi-Arch` field is used to inform the installation system (APT and dpkg) about how it should handle + dependency relations involving this package and foreign architectures. This is useful for multiple purposes + such as cross-building without emulation and installing 32-bit packages on a 64-bit system. The latter is + often done to use legacy apps or old games that was never ported to 64-bit machines. + + **Example**: + ``` + Multi-Arch: foreign + ``` + + The rules for `Multi-Arch` can be quite complicated, but in many cases the following simple rules of thumb + gets you a long way: + + * If the [Multi-Arch hinter] comes with a hint, then it almost certainly correct. You are recommended + to check the hint for further details (some changes can be complicated to do). Note that the + Multi-Arch hinter is only run for official Debian packages and may not be applicable to your case. + + * If you have an `Architecture: all` data-only package, then it often want to be `Multi-Arch: foreign` + + * If you have an architecture dependent package, where everything is installed in + `/usr/lib/${DEB_HOST_MULTIARCH}` (plus a bit of standard documentation in `/usr/share/doc`), then + you *probably* want `Multi-Arch: same` + + * If none of the above applies, then omit the field unless you know what you are doing or you are + receiving advice from a Multi-Arch expert. + + + There are 4 possible values for the Multi-Arch field, though not all values are applicable to all packages: + + + * `no` - The default. The package can be installed for at most one architecture at the time. It can + *only* satisfy relations for the same architecture as itself. Note that `Architecture: all` packages + are considered as a part of the system's "primary" architecture (see output of `dpkg --print-architecture`). + + Use of an explicit `no` over omitting the field is commonly done to signal that someone took the + effort to understand the situation and concluded `no` was the right answer. + + Note: Despite the `no`, the package *can* be installed for a foreign architecture (e.g. you can + install a 32-bit version of a package on a 64-bit system). However, packages depending on it must also + be installed for the foreign architecture. + + + * `foreign` - The package can be installed for at most one architecture at the time. However, it can + satisfy relations for packages regardless of their architecture. This is often useful for packages + solely providing data or binaries that have "Multi-Arch neutral interfaces". Sadly, describing + a "Multi-Arch neutral interface" is hard and often only done by Multi-Arch experts on a case-by-case + basis. Among other, scripts despite being the same on all architectures can still have a "non-neutral" + "Multi-Arch" interface if their output is architecture dependent or if they dependencies force them + out of the `foreign` role. The dependency issue usually happens when depending indirectly on an + `Multi-Arch: allowed` package. + + Some programs are have "Multi-Arch dependent interfaces" and are not safe to declare as + `Multi-Arch: foreign`. The name `foreign` refers to the fact that the package can satisfy relations + for native *and foreign* architectures at the same time. + + + * `same` - The same version of the package can be co-installed for multiple architecture. However, + for this to work, the package **must** ship all files in architecture unique paths (usually + beneath `/usr/lib/${DEB_HOST_MULTIARCH}`) **or** have bit-for-bit identical content in files + that are in non-architecture unique paths (e.g. `/usr/share/doc`). Note that these packages + typically do not contain configuration files or **dpkg** `conffile`s. + + The name `same` refers to the fact that the package can satisfy relations only for the "same" + architecture as itself. However, in this case, it is co-installable with itself as noted above. + + Note: This value **cannot** be used with `Architecture: all`. + + + * `allowed` - **Advanced value**. This value is for a complex use-case that most people does not + need. Consider it only if none of the other values seem to do the trick. + + The package is **NOT** co-installable with itself but can satisfy Multi-Arch foreign and Multi-Arch same + relations at the same. This is useful for implementations of scripting languages (e.g. Perl or Python). + Here the interpreter contextually need to satisfy some relations as `Multi-Arch: foreign` and others as + `Multi-Arch: same` (or `Multi-Arch: no`). + + Typically, native extensions or plugins will need a `Multi-Arch: same`-relation as they only work with + the interpreter compiled for the same machine architecture as themselves whereas scripts are usually + less picky and can rely on the `Multi-Arch: foreign` relation. Packages wanting to rely on the + `Multi-Arch: foreign` interface must explicitly declare this adding a `:any` suffix to the package name + in the dependency relation (such as `Depends: python3:any`). However, the `:any` suffix cannot be used + unconditionally and should not be used unless you know you need it. + + Note that depending indirectly on a `Multi-Arch: allowed` package can require a `Architecture: all` + + `Multi-Arch: foreign` package to be converted to a `Architecture: any` package. This case is named + the "Multi-Arch interpreter problem", since it is commonly seen with script interpreters. However, + despite the name, it can happen to any kind of package. The bug [Debian#984701] is an example of + this happen in practice. + + [Multi-Arch hinter]: https://wiki.debian.org/MultiArch/Hints + [Debian#984701]: https://bugs.debian.org/984701 + """ + ), + ), + DctrlKnownField( + "X-DH-Build-For-Type", + FieldValueClass.SINGLE_VALUE, + default_value="host", + known_values=_allowed_values( + Keyword( + "host", + hover_text="The package should be compiled for `DEB_HOST_TARGET` (the default).", + ), + Keyword( + "target", + hover_text="The package should be compiled for `DEB_TARGET_ARCH`.", + ), + ), + hover_text=textwrap.dedent( + """\ + **Special-purpose only**. *This field is a special purpose field and is rarely needed.* + *You are recommended to omit unless you know you need it or someone told you to use it.* + + This field is used when building a cross-compiling C-compiler (or similar cases), some packages need + to be build for target (DEB_**TARGET**_ARCH) rather than the host (DEB_**HOST**_ARCH) architecture. + + **Example**: + ``` + Package: gcc + Architecture: any + # ... + + Package: libgcc-s1 + Architecture: any + # When building a cross-compiling gcc, then this library needs to be built for the target architecture + # as binaries compiled by gcc will link with this library. + X-DH-Build-For-Type: target + # ... + ``` + + If you are in doubt, then you probably do **not** need this field. + """ + ), + ), + DctrlKnownField( + "X-Time64-Compat", + FieldValueClass.SINGLE_VALUE, + hover_text=textwrap.dedent( + """\ + Special purpose field renamed to the 64-bit time transition. + + It is used to inform packaging helpers what the original (non-transitioned) package name + was when the auto-detection is inadequate. The non-transitioned package name is then + conditionally provided in the `${t64:Provides}` substitution variable. + """ + ), + ), + DctrlKnownField( + "Homepage", + FieldValueClass.SINGLE_VALUE, + hover_text=textwrap.dedent( + """\ + Link to the upstream homepage for this binary package. + + This field is rarely used in Package stanzas as most binary packages should have the + same homepage as the source package. Though, in the exceptional case where a particular + binary package should have a more specific homepage than the source package, you can + use this field to override the source package field. + ``` + """ + ), + ), + DctrlKnownField( + "Description", + FieldValueClass.FREE_TEXT_FIELD, + spellcheck_value=True, + # It will build just fine. But no one will know what it is for, so it probably won't be installed + missing_field_severity=DiagnosticSeverity.Warning, + hover_text=textwrap.dedent( + """\ + A human-readable description of the package. This field consists of two related but distinct parts. + + + The first line immediately after the field is called the *Synopsis* and is a short "noun-phrase" + intended to provide a one-line summary of the package. The lines after the **Synopsis** is known + as the **Extended Description** and is intended as a longer summary of the package. + + **Example:** + ``` + Description: documentation generator for Python projects + Sphinx is a tool for producing documentation for Python projects, using + reStructuredText as markup language. + . + Sphinx features: + * HTML, CHM, LaTeX output, + * Cross-referencing source code, + * Automatic indices, + * Code highlighting, using Pygments, + * Extensibility. Existing extensions: + - automatic testing of code snippets, + - including docstrings from Python modules. + . + Build-depend on sphinx if your package uses /usr/bin/sphinx-* + executables. Build-depend on python3-sphinx if your package uses + the Python API (for instance by calling python3 -m sphinx). + ``` + + The **Synopsis** is usually displayed in cases where there is limited space such as when reviewing + the search results from `apt search foo`. It is often a good idea to imagine that the **Synopsis** + part is inserted into a sentence like "The package provides {{Synopsis-goes-here}}". The + **Extended Description** is a standalone description that should describe what the package does and + how it relates to the rest of the system (in terms of, for example, which subsystem it is which part of). + Please see https://www.debian.org/doc/debian-policy/ch-controlfields.html#description for more details + about the description field and suggestions for how to write it. + """ + ), + ), + DctrlKnownField( + "XB-Cnf-Visible-Pkgname", + FieldValueClass.SINGLE_VALUE, + hover_text=textwrap.dedent( + """\ + **Special-case field**: *This field is only useful in very special circumstances.* + *Consider whether you truly need it before adding this field.* + + This field is used by `command-not-found` and can be used to override which package + `command-not-found` should propose the user to install. + + Normally, when `command-not-found` detects a missing command, it will suggest the + user to install the package name listed in the `Package` field. In most cases, this + is what you want. However, in certain special-cases, the binary is provided by a + minimal package for technical reasons (like `python3-minimal`) and the user should + really install a package that provides more features (such as `python3` to follow + the example). + + **Example**: + ``` + Package: python3-minimal + XB-Cnf-Visible-Pkgname: python3 + ``` + + Related bug: https://bugs.launchpad.net/ubuntu/+source/python-defaults/+bug/1867157 + """ + ), + ), + DctrlKnownField( + "X-DhRuby-Root", + FieldValueClass.SINGLE_VALUE, + hover_text=textwrap.dedent( + """\ + Used by `dh_ruby` to request "multi-binary" layout and where the root for the given + package is. + + Please refer to the documentation of `dh_ruby` for more details. + + https://manpages.debian.org/dh_ruby + """ + ), + ), +) +_DEP5_HEADER_FIELDS = _fields( + Deb822KnownField( + "Format", + FieldValueClass.SINGLE_VALUE, + is_stanza_name=True, + missing_field_severity=DiagnosticSeverity.Error, + ), + Deb822KnownField( + "Upstream-Name", + FieldValueClass.FREE_TEXT_FIELD, + ), + Deb822KnownField( + "Upstream-Contract", + FieldValueClass.FREE_TEXT_FIELD, + ), + Deb822KnownField( + "Source", + FieldValueClass.FREE_TEXT_FIELD, + ), + Deb822KnownField( + "Disclaimer", + FieldValueClass.FREE_TEXT_FIELD, + spellcheck_value=True, + ), + Deb822KnownField( + "Comment", + FieldValueClass.FREE_TEXT_FIELD, + spellcheck_value=True, + ), + Deb822KnownField( + "License", + FieldValueClass.FREE_TEXT_FIELD, + # Do not tempt people to change legal text because the spellchecker wants to do a typo fix. + spellcheck_value=False, + ), +) +_DEP5_FILES_FIELDS = _fields( + Deb822KnownField( + "Files", + FieldValueClass.DEP5_FILE_LIST, + is_stanza_name=True, + missing_field_severity=DiagnosticSeverity.Error, + ), + Deb822KnownField( + "Copyright", + FieldValueClass.FREE_TEXT_FIELD, + # Mostly going to be names with very little free-text; high risk of false positives with low value + spellcheck_value=False, + missing_field_severity=DiagnosticSeverity.Error, + ), + Deb822KnownField( + "License", + FieldValueClass.FREE_TEXT_FIELD, + missing_field_severity=DiagnosticSeverity.Error, + # Do not tempt people to change legal text because the spellchecker wants to do a typo fix. + spellcheck_value=False, + ), + Deb822KnownField( + "Comment", + FieldValueClass.FREE_TEXT_FIELD, + spellcheck_value=True, + ), +) +_DEP5_LICENSE_FIELDS = _fields( + Deb822KnownField( + "License", + FieldValueClass.FREE_TEXT_FIELD, + is_stanza_name=True, + # Do not tempt people to change legal text because the spellchecker wants to do a typo fix. + spellcheck_value=False, + missing_field_severity=DiagnosticSeverity.Error, + ), + Deb822KnownField( + "Comment", + FieldValueClass.FREE_TEXT_FIELD, + spellcheck_value=True, + ), +) + + +@dataclasses.dataclass(slots=True, frozen=True) +class StanzaMetadata(Mapping[str, F], Generic[F], ABC): + stanza_type_name: str + stanza_fields: Mapping[str, F] + + def stanza_diagnostics( + self, + stanza: Deb822ParagraphElement, + stanza_position_in_file: "TEPosition", + ) -> Iterable[Diagnostic]: + raise NotImplementedError + + def __getitem__(self, key: str) -> F: + key_lc = key.lower() + key_norm = normalize_dctrl_field_name(key_lc) + return self.stanza_fields[key_norm] + + def __len__(self) -> int: + return len(self.stanza_fields) + + def __iter__(self): + return iter(self.stanza_fields.keys()) + + +@dataclasses.dataclass(slots=True, frozen=True) +class Dep5StanzaMetadata(StanzaMetadata[Deb822KnownField]): + def stanza_diagnostics( + self, + stanza: Deb822ParagraphElement, + stanza_position_in_file: "TEPosition", + ) -> Iterable[Diagnostic]: + pass + + +@dataclasses.dataclass(slots=True, frozen=True) +class DctrlStanzaMetadata(StanzaMetadata[DctrlKnownField]): + + def stanza_diagnostics( + self, + stanza: Deb822ParagraphElement, + stanza_position_in_file: "TEPosition", + ) -> Iterable[Diagnostic]: + pass + + +class Deb822FileMetadata(Generic[S]): + def classify_stanza(self, stanza: Deb822ParagraphElement, stanza_idx: int) -> S: + return self.guess_stanza_classification_by_idx(stanza_idx) + + def guess_stanza_classification_by_idx(self, stanza_idx: int) -> S: + raise NotImplementedError + + def stanza_types(self) -> Iterable[S]: + raise NotImplementedError + + def __getitem__(self, item: str) -> S: + raise NotImplementedError + + def file_diagnostics( + self, + file: Deb822FileElement, + ) -> Iterable[Diagnostic]: + raise NotImplementedError + + def get(self, item: str) -> Optional[S]: + try: + return self[item] + except KeyError: + return None + + +_DCTRL_SOURCE_STANZA = DctrlStanzaMetadata( + "Source", + SOURCE_FIELDS, +) +_DCTRL_PACKAGE_STANZA = DctrlStanzaMetadata("Package", BINARY_FIELDS) + +_DEP5_HEADER_STANZA = Dep5StanzaMetadata( + "Header", + _DEP5_HEADER_FIELDS, +) +_DEP5_FILES_STANZA = Dep5StanzaMetadata( + "Files", + _DEP5_FILES_FIELDS, +) +_DEP5_LICENSE_STANZA = Dep5StanzaMetadata( + "License", + _DEP5_LICENSE_FIELDS, +) + + +class Dep5FileMetadata(Deb822FileMetadata[Dep5StanzaMetadata]): + def classify_stanza(self, stanza: Deb822ParagraphElement, stanza_idx: int) -> S: + if stanza_idx == 0: + return _DEP5_HEADER_STANZA + if stanza_idx > 0: + if "Files" in stanza: + return _DEP5_FILES_STANZA + return _DEP5_LICENSE_STANZA + raise ValueError("The stanza_idx must be 0 or greater") + + def guess_stanza_classification_by_idx(self, stanza_idx: int) -> S: + if stanza_idx == 0: + return _DEP5_HEADER_STANZA + if stanza_idx > 0: + return _DEP5_FILES_STANZA + raise ValueError("The stanza_idx must be 0 or greater") + + def stanza_types(self) -> Iterable[S]: + yield _DEP5_HEADER_STANZA + yield _DEP5_FILES_STANZA + yield _DEP5_LICENSE_STANZA + + def __getitem__(self, item: str) -> S: + if item == "Header": + return _DEP5_FILES_STANZA + if item == "Files": + return _DEP5_FILES_STANZA + if item == "License": + return _DEP5_LICENSE_STANZA + raise KeyError(item) + + +class DctrlFileMetadata(Deb822FileMetadata[DctrlStanzaMetadata]): + def guess_stanza_classification_by_idx(self, stanza_idx: int) -> S: + if stanza_idx == 0: + return _DCTRL_SOURCE_STANZA + if stanza_idx > 0: + return _DCTRL_PACKAGE_STANZA + raise ValueError("The stanza_idx must be 0 or greater") + + def stanza_types(self) -> Iterable[S]: + yield _DCTRL_SOURCE_STANZA + yield _DCTRL_PACKAGE_STANZA + + def __getitem__(self, item: str) -> S: + if item == "Source": + return _DCTRL_SOURCE_STANZA + if item == "Package": + return _DCTRL_PACKAGE_STANZA + raise KeyError(item) diff --git a/src/debputy/lsp/lsp_debian_copyright.py b/src/debputy/lsp/lsp_debian_copyright.py new file mode 100644 index 0000000..052654a --- /dev/null +++ b/src/debputy/lsp/lsp_debian_copyright.py @@ -0,0 +1,685 @@ +import re +from typing import ( + Union, + Sequence, + Tuple, + Iterator, + Optional, + Iterable, + Mapping, + List, +) + +from debputy.lsp.vendoring._deb822_repro import ( + parse_deb822_file, + Deb822FileElement, + Deb822ParagraphElement, +) +from debputy.lsp.vendoring._deb822_repro.parsing import ( + Deb822KeyValuePairElement, + LIST_SPACE_SEPARATED_INTERPRETATION, +) +from debputy.lsp.vendoring._deb822_repro.tokens import ( + Deb822Token, + tokenize_deb822_file, + Deb822FieldNameToken, +) +from lsprotocol.types import ( + DiagnosticSeverity, + Range, + Diagnostic, + Position, + DidOpenTextDocumentParams, + DidChangeTextDocumentParams, + FoldingRangeKind, + FoldingRange, + FoldingRangeParams, + CompletionItem, + CompletionList, + CompletionParams, + TEXT_DOCUMENT_DID_OPEN, + TEXT_DOCUMENT_DID_CHANGE, + TEXT_DOCUMENT_FOLDING_RANGE, + TEXT_DOCUMENT_COMPLETION, + TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL, + DiagnosticRelatedInformation, + Location, + TEXT_DOCUMENT_HOVER, + HoverParams, + Hover, + TEXT_DOCUMENT_CODE_ACTION, + DiagnosticTag, + SemanticTokensLegend, + TEXT_DOCUMENT_SEMANTIC_TOKENS_FULL, + SemanticTokens, + SemanticTokensParams, +) + +from debputy.lsp.lsp_debian_control_reference_data import ( + FieldValueClass, + _DEP5_HEADER_FIELDS, + _DEP5_FILES_FIELDS, + Deb822KnownField, + _DEP5_LICENSE_FIELDS, + Dep5FileMetadata, +) +from debputy.lsp.lsp_features import ( + lint_diagnostics, + lsp_completer, + lsp_hover, + lsp_standard_handler, +) +from debputy.lsp.lsp_generic_deb822 import deb822_completer, deb822_hover +from debputy.lsp.quickfixes import ( + propose_remove_line_quick_fix, + propose_correct_text_quick_fix, + provide_standard_quickfixes_from_diagnostics, +) +from debputy.lsp.spellchecking import default_spellchecker +from debputy.lsp.text_util import ( + on_save_trim_end_of_line_whitespace, + normalize_dctrl_field_name, + LintCapablePositionCodec, + detect_possible_typo, + te_range_to_lsp, +) +from debputy.util import _info, _error + +try: + from debputy.lsp.vendoring._deb822_repro.locatable import ( + Position as TEPosition, + Range as TERange, + START_POSITION, + ) + + from pygls.server import LanguageServer + from pygls.workspace import TextDocument +except ImportError: + pass + + +_CONTAINS_SPACE_OR_COLON = re.compile(r"[\s:]") +_LANGUAGE_IDS = [ + "debian/copyright", + # emacs's name + "debian-copyright", + # vim's name + "debcopyright", +] + +_DEP5_FILE_METADATA = Dep5FileMetadata() + +SEMANTIC_TOKENS_LEGEND = SemanticTokensLegend( + token_types=["keyword"], + token_modifiers=[], +) + + +def register_dcpy_lsp(ls: "LanguageServer") -> None: + try: + from debian._deb822_repro.locatable import Locatable + except ImportError: + _error( + 'Sorry; this feature requires a newer version of python-debian (with "Locatable").' + ) + + ls.feature(TEXT_DOCUMENT_DID_OPEN)(_diagnostics_debian_copyright) + ls.feature(TEXT_DOCUMENT_DID_CHANGE)(_diagnostics_debian_copyright) + ls.feature(TEXT_DOCUMENT_FOLDING_RANGE)(_detect_folding_ranges_debian_copyright) + ls.feature(TEXT_DOCUMENT_COMPLETION)(_debian_copyright_completions) + ls.feature(TEXT_DOCUMENT_CODE_ACTION)(provide_standard_quickfixes_from_diagnostics) + ls.feature(TEXT_DOCUMENT_HOVER)(_debian_copyright_hover) + ls.feature(TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL)(on_save_trim_end_of_line_whitespace) + ls.feature(TEXT_DOCUMENT_SEMANTIC_TOKENS_FULL, SEMANTIC_TOKENS_LEGEND)( + _handle_semantic_tokens_full + ) + + +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_CODE_ACTION) +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) + + +@lsp_hover(_LANGUAGE_IDS) +def _debian_copyright_hover( + ls: "LanguageServer", + params: HoverParams, +) -> Optional[Hover]: + return deb822_hover(ls, params, _DEP5_FILE_METADATA) + + +@lsp_completer(_LANGUAGE_IDS) +def _debian_copyright_completions( + ls: "LanguageServer", + params: CompletionParams, +) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]: + return deb822_completer(ls, params, _DEP5_FILE_METADATA) + + +def _detect_folding_ranges_debian_copyright( + ls: "LanguageServer", + params: FoldingRangeParams, +) -> Optional[Sequence[FoldingRange]]: + doc = ls.workspace.get_text_document(params.text_document.uri) + comment_start = -1 + folding_ranges = [] + for ( + token, + start_line, + start_offset, + end_line, + end_offset, + ) in _deb822_token_iter(tokenize_deb822_file(doc.lines)): + if token.is_comment: + if comment_start < 0: + comment_start = start_line + _info(f"Detected new comment: {start_line}") + elif comment_start > -1: + comment_start = -1 + folding_range = FoldingRange( + comment_start, + end_line, + kind=FoldingRangeKind.Comment, + ) + + folding_ranges.append(folding_range) + _info(f"Detected folding range: {folding_range}") + + return folding_ranges + + +def _deb822_token_iter( + tokens: Iterable[Deb822Token], +) -> Iterator[Tuple[Deb822Token, int, int, int, int, int]]: + line_no = 0 + line_offset = 0 + + for token in tokens: + start_line = line_no + start_line_offset = line_offset + + newlines = token.text.count("\n") + line_no += newlines + text_len = len(token.text) + if newlines: + if token.text.endswith("\n"): + line_offset = 0 + else: + # -2, one to remove the "\n" and one to get 0-offset + line_offset = text_len - token.text.rindex("\n") - 2 + else: + line_offset += text_len + + yield token, start_line, start_line_offset, line_no, line_offset + + +def _paragraph_representation_field( + paragraph: Deb822ParagraphElement, +) -> Deb822KeyValuePairElement: + return next(iter(paragraph.iter_parts_of_type(Deb822KeyValuePairElement))) + + +def _extract_first_value_and_position( + kvpair: Deb822KeyValuePairElement, + stanza_pos: "TEPosition", + position_codec: "LintCapablePositionCodec", + lines: List[str], +) -> Tuple[Optional[str], Optional[Range]]: + kvpair_pos = kvpair.position_in_parent().relative_to(stanza_pos) + value_element_pos = kvpair.value_element.position_in_parent().relative_to( + kvpair_pos + ) + for value_ref in kvpair.interpret_as( + LIST_SPACE_SEPARATED_INTERPRETATION + ).iter_value_references(): + v = value_ref.value + section_value_loc = value_ref.locatable + value_range_te = section_value_loc.range_in_parent().relative_to( + value_element_pos + ) + section_range_server_units = te_range_to_lsp(value_range_te) + section_range = position_codec.range_to_client_units( + lines, section_range_server_units + ) + return v, section_range + return None, None + + +def _diagnostics_for_paragraph( + stanza: Deb822ParagraphElement, + stanza_position: "TEPosition", + known_fields: Mapping[str, Deb822KnownField], + other_known_fields: Mapping[str, Deb822KnownField], + is_files_or_license_paragraph: bool, + doc_reference: str, + position_codec: "LintCapablePositionCodec", + lines: List[str], + diagnostics: List[Diagnostic], +) -> None: + representation_field = _paragraph_representation_field(stanza) + representation_field_pos = representation_field.position_in_parent().relative_to( + stanza_position + ) + representation_field_range_server_units = te_range_to_lsp( + TERange.from_position_and_size( + representation_field_pos, representation_field.size() + ) + ) + representation_field_range = position_codec.range_to_client_units( + lines, + representation_field_range_server_units, + ) + for known_field in known_fields.values(): + missing_field_severity = known_field.missing_field_severity + if missing_field_severity is None or known_field.name in stanza: + continue + + diagnostics.append( + Diagnostic( + representation_field_range, + f"Stanza is missing field {known_field.name}", + severity=missing_field_severity, + source="debputy", + ) + ) + + seen_fields = {} + + for kvpair in stanza.iter_parts_of_type(Deb822KeyValuePairElement): + field_name_token = kvpair.field_token + field_name = field_name_token.text + field_name_lc = field_name.lower() + normalized_field_name_lc = normalize_dctrl_field_name(field_name_lc) + known_field = known_fields.get(normalized_field_name_lc) + field_value = stanza[field_name] + field_range_te = kvpair.range_in_parent().relative_to(stanza_position) + field_position_te = field_range_te.start_pos + field_range_server_units = te_range_to_lsp(field_range_te) + field_range = position_codec.range_to_client_units( + lines, + field_range_server_units, + ) + field_name_typo_detected = False + existing_field_range = seen_fields.get(normalized_field_name_lc) + if existing_field_range is not None: + existing_field_range[3].append(field_range) + else: + normalized_field_name = normalize_dctrl_field_name(field_name) + seen_fields[field_name_lc] = ( + field_name, + normalized_field_name, + field_range, + [], + ) + + if known_field is None: + candidates = detect_possible_typo(normalized_field_name_lc, known_fields) + if candidates: + known_field = known_fields[candidates[0]] + token_range_server_units = te_range_to_lsp( + TERange.from_position_and_size( + field_position_te, kvpair.field_token.size() + ) + ) + field_range = position_codec.range_to_client_units( + lines, + token_range_server_units, + ) + field_name_typo_detected = True + diagnostics.append( + Diagnostic( + field_range, + f'The "{field_name}" looks like a typo of "{known_field.name}".', + severity=DiagnosticSeverity.Warning, + source="debputy", + data=[ + propose_correct_text_quick_fix(known_fields[m].name) + for m in candidates + ], + ) + ) + if known_field is None: + known_else_where = other_known_fields.get(normalized_field_name_lc) + if known_else_where is not None: + intended_usage = ( + "Header" if is_files_or_license_paragraph else "Files/License" + ) + diagnostics.append( + Diagnostic( + field_range, + f'The {field_name} is defined for use in the "{intended_usage}" stanza.' + f" Please move it to the right place or remove it", + severity=DiagnosticSeverity.Error, + source="debputy", + ) + ) + continue + + if field_value.strip() == "": + diagnostics.append( + Diagnostic( + field_range, + f"The {field_name} has no value. Either provide a value or remove it.", + severity=DiagnosticSeverity.Error, + source="debputy", + ) + ) + continue + diagnostics.extend( + known_field.field_diagnostics( + kvpair, + stanza_position, + position_codec, + lines, + field_name_typo_reported=field_name_typo_detected, + ) + ) + if known_field.spellcheck_value: + words = kvpair.interpret_as(LIST_SPACE_SEPARATED_INTERPRETATION) + spell_checker = default_spellchecker() + value_position = kvpair.value_element.position_in_parent().relative_to( + field_position_te + ) + for word_ref in words.iter_value_references(): + token = word_ref.value + for word, pos, endpos in spell_checker.iter_words(token): + corrections = spell_checker.provide_corrections_for(word) + if not corrections: + continue + word_loc = word_ref.locatable + word_pos_te = word_loc.position_in_parent().relative_to( + value_position + ) + if pos: + word_pos_te = TEPosition(0, pos).relative_to(word_pos_te) + word_range = TERange( + START_POSITION, + TEPosition(0, endpos - pos), + ) + word_range_server_units = te_range_to_lsp( + TERange.from_position_and_size(word_pos_te, word_range) + ) + word_range = position_codec.range_to_client_units( + lines, + word_range_server_units, + ) + diagnostics.append( + Diagnostic( + word_range, + f'Spelling "{word}"', + severity=DiagnosticSeverity.Hint, + source="debputy", + data=[ + propose_correct_text_quick_fix(c) for c in corrections + ], + ) + ) + if known_field.warn_if_default and field_value == known_field.default_value: + diagnostics.append( + Diagnostic( + field_range, + f"The {field_name} is redundant as it is set to the default value and the field should only be" + " used in exceptional cases.", + severity=DiagnosticSeverity.Warning, + source="debputy", + ) + ) + for ( + field_name, + normalized_field_name, + field_range, + duplicates, + ) in seen_fields.values(): + if not duplicates: + continue + related_information = [ + DiagnosticRelatedInformation( + location=Location(doc_reference, field_range), + message=f"First definition of {field_name}", + ) + ] + related_information.extend( + DiagnosticRelatedInformation( + location=Location(doc_reference, r), + message=f"Duplicate of {field_name}", + ) + for r in duplicates + ) + for dup_range in duplicates: + diagnostics.append( + Diagnostic( + dup_range, + f"The {normalized_field_name} field name was used multiple times in this stanza." + f" Please ensure the field is only used once per stanza. Note that {normalized_field_name} and" + f" X[BCS]-{normalized_field_name} are considered the same field.", + severity=DiagnosticSeverity.Error, + source="debputy", + related_information=related_information, + ) + ) + + +def _diagnostics_for_field_name( + token: Deb822FieldNameToken, + token_position: "TEPosition", + known_field: Deb822KnownField, + typo_detected: bool, + position_codec: "LintCapablePositionCodec", + lines: List[str], + diagnostics: List[Diagnostic], +) -> None: + field_name = token.text + # Defeat the case-insensitivity from python-debian + field_name_cased = str(field_name) + token_range_server_units = te_range_to_lsp( + TERange.from_position_and_size(token_position, token.size()) + ) + token_range = position_codec.range_to_client_units( + lines, + token_range_server_units, + ) + if known_field.deprecated_with_no_replacement: + diagnostics.append( + Diagnostic( + token_range, + f"{field_name_cased} is deprecated and no longer used", + severity=DiagnosticSeverity.Warning, + source="debputy", + tags=[DiagnosticTag.Deprecated], + data=propose_remove_line_quick_fix(), + ) + ) + elif known_field.replaced_by is not None: + diagnostics.append( + Diagnostic( + token_range, + f"{field_name_cased} is a deprecated name for {known_field.replaced_by}", + severity=DiagnosticSeverity.Warning, + source="debputy", + tags=[DiagnosticTag.Deprecated], + data=propose_correct_text_quick_fix(known_field.replaced_by), + ) + ) + + if not typo_detected and field_name_cased != known_field.name: + diagnostics.append( + Diagnostic( + token_range, + f"Non-canonical spelling of {known_field.name}", + severity=DiagnosticSeverity.Information, + source="debputy", + data=propose_correct_text_quick_fix(known_field.name), + ) + ) + + +def _scan_for_syntax_errors_and_token_level_diagnostics( + deb822_file: Deb822FileElement, + position_codec: LintCapablePositionCodec, + lines: List[str], + diagnostics: List[Diagnostic], +) -> int: + first_error = len(lines) + 1 + spell_checker = default_spellchecker() + for ( + token, + start_line, + start_offset, + end_line, + end_offset, + ) in _deb822_token_iter(deb822_file.iter_tokens()): + if token.is_error: + first_error = min(first_error, start_line) + start_pos = Position( + start_line, + start_offset, + ) + end_pos = Position( + end_line, + end_offset, + ) + token_range = position_codec.range_to_client_units( + lines, Range(start_pos, end_pos) + ) + diagnostics.append( + Diagnostic( + token_range, + "Syntax error", + severity=DiagnosticSeverity.Error, + source="debputy (python-debian parser)", + ) + ) + elif token.is_comment: + for word, pos, end_pos in spell_checker.iter_words(token.text): + corrections = spell_checker.provide_corrections_for(word) + if not corrections: + continue + start_pos = Position( + start_line, + pos, + ) + end_pos = Position( + start_line, + end_pos, + ) + word_range = position_codec.range_to_client_units( + lines, Range(start_pos, end_pos) + ) + diagnostics.append( + Diagnostic( + word_range, + f'Spelling "{word}"', + severity=DiagnosticSeverity.Hint, + source="debputy", + data=[propose_correct_text_quick_fix(c) for c in corrections], + ) + ) + return first_error + + +def _diagnostics_debian_copyright( + ls: "LanguageServer", + params: Union[DidOpenTextDocumentParams, DidChangeTextDocumentParams], +) -> None: + doc = ls.workspace.get_text_document(params.text_document.uri) + _info(f"Opened document: {doc.path} ({doc.language_id})") + lines = doc.lines + position_codec: LintCapablePositionCodec = doc.position_codec + + diagnostics = _lint_debian_copyright(doc.uri, doc.path, lines, position_codec) + ls.publish_diagnostics( + doc.uri, + diagnostics, + ) + + +@lint_diagnostics(_LANGUAGE_IDS) +def _lint_debian_copyright( + doc_reference: str, + _path: str, + lines: List[str], + position_codec: LintCapablePositionCodec, +) -> Optional[List[Diagnostic]]: + diagnostics = [] + deb822_file = parse_deb822_file( + lines, + accept_files_with_duplicated_fields=True, + accept_files_with_error_tokens=True, + ) + + first_error = _scan_for_syntax_errors_and_token_level_diagnostics( + deb822_file, + position_codec, + lines, + diagnostics, + ) + + paragraphs = list(deb822_file) + is_dep5 = False + + for paragraph_no, paragraph in enumerate(paragraphs, start=1): + paragraph_pos = paragraph.position_in_file() + if paragraph_pos.line_position >= first_error: + break + is_files_or_license_paragraph = paragraph_no != 1 + if is_files_or_license_paragraph: + known_fields = ( + _DEP5_FILES_FIELDS if "Files" in paragraph else _DEP5_LICENSE_FIELDS + ) + other_known_fields = _DEP5_HEADER_FIELDS + elif "Format" in paragraph: + is_dep5 = True + known_fields = _DEP5_HEADER_FIELDS + other_known_fields = _DEP5_FILES_FIELDS + else: + break + _diagnostics_for_paragraph( + paragraph, + paragraph_pos, + known_fields, + other_known_fields, + is_files_or_license_paragraph, + doc_reference, + position_codec, + lines, + diagnostics, + ) + if not is_dep5: + return None + return diagnostics + + +def _handle_semantic_tokens_full( + ls: "LanguageServer", + request: SemanticTokensParams, +) -> Optional[SemanticTokens]: + doc = ls.workspace.get_text_document(request.text_document.uri) + lines = doc.lines + deb822_file = parse_deb822_file( + lines, + accept_files_with_duplicated_fields=True, + accept_files_with_error_tokens=True, + ) + tokens = [] + previous_line = 0 + keyword_token = 0 + no_modifiers = 0 + + for paragraph_no, paragraph in enumerate(deb822_file, start=1): + paragraph_position = paragraph.position_in_file() + for kvpair in paragraph.iter_parts_of_type(Deb822KeyValuePairElement): + field_position_without_comments = kvpair.position_in_parent().relative_to( + paragraph_position + ) + field_size = doc.position_codec.client_num_units(kvpair.field_name) + current_line = field_position_without_comments.line_position + line_delta = current_line - previous_line + previous_line = current_line + tokens.append(line_delta) # Line delta + tokens.append(0) # Token delta + tokens.append(field_size) # Token length + tokens.append(keyword_token) + tokens.append(no_modifiers) + + if not tokens: + return None + return SemanticTokens(tokens) diff --git a/src/debputy/lsp/lsp_debian_debputy_manifest.py b/src/debputy/lsp/lsp_debian_debputy_manifest.py new file mode 100644 index 0000000..2f9920e --- /dev/null +++ b/src/debputy/lsp/lsp_debian_debputy_manifest.py @@ -0,0 +1,111 @@ +import re +from typing import ( + Optional, + List, +) + +from lsprotocol.types import ( + Diagnostic, + TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL, + Position, + Range, + DiagnosticSeverity, +) +from ruamel.yaml.error import MarkedYAMLError, YAMLError + +from debputy.highlevel_manifest import MANIFEST_YAML +from debputy.lsp.lsp_features import ( + lint_diagnostics, + lsp_standard_handler, +) +from debputy.lsp.text_util import ( + LintCapablePositionCodec, +) + +try: + from pygls.server import LanguageServer +except ImportError: + pass + + +_CONTAINS_TAB_OR_COLON = re.compile(r"[\t:]") +_WORDS_RE = re.compile("([a-zA-Z0-9_-]+)") +_MAKE_ERROR_RE = re.compile(r"^[^:]+:(\d+):\s*(\S.+)") + + +_LANGUAGE_IDS = [ + "debian/debputy.manifest", + "debputy.manifest", + # LSP's official language ID for YAML files + "yaml", +] + + +# lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_CODE_ACTION) +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) + + +def _word_range_at_position( + lines: List[str], + line_no: int, + char_offset: int, +) -> Range: + line = lines[line_no] + line_len = len(line) + start_idx = char_offset + end_idx = char_offset + while end_idx + 1 < line_len and not line[end_idx + 1].isspace(): + end_idx += 1 + + while start_idx - 1 >= 0 and not line[start_idx - 1].isspace(): + start_idx -= 1 + + return Range( + Position(line_no, start_idx), + Position(line_no, end_idx), + ) + + +@lint_diagnostics(_LANGUAGE_IDS) +def _lint_debian_debputy_manifest( + _doc_reference: str, + _path: str, + lines: List[str], + position_codec: LintCapablePositionCodec, +) -> Optional[List[Diagnostic]]: + diagnostics = [] + try: + MANIFEST_YAML.load("".join(lines)) + except MarkedYAMLError as e: + error_range = position_codec.range_to_client_units( + lines, + _word_range_at_position( + lines, + e.problem_mark.line, + e.problem_mark.column, + ), + ) + diagnostics.append( + Diagnostic( + error_range, + f"YAML parse error: {e}", + DiagnosticSeverity.Error, + ), + ) + except YAMLError as e: + error_range = position_codec.range_to_client_units( + lines, + Range( + Position(0, 0), + Position(0, len(lines[0])), + ), + ) + diagnostics.append( + Diagnostic( + error_range, + f"Unknown YAML parse error: {e} [{e!r}]", + DiagnosticSeverity.Error, + ), + ) + + return diagnostics diff --git a/src/debputy/lsp/lsp_debian_rules.py b/src/debputy/lsp/lsp_debian_rules.py new file mode 100644 index 0000000..7f0e5fb --- /dev/null +++ b/src/debputy/lsp/lsp_debian_rules.py @@ -0,0 +1,384 @@ +import itertools +import json +import os +import re +import subprocess +from typing import ( + Union, + Sequence, + Optional, + Iterable, + List, + Iterator, + Tuple, +) + +from lsprotocol.types import ( + CompletionItem, + DidOpenTextDocumentParams, + DidChangeTextDocumentParams, + Diagnostic, + Range, + Position, + DiagnosticSeverity, + CompletionList, + CompletionParams, + TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL, + TEXT_DOCUMENT_CODE_ACTION, +) + +from debputy.lsp.lsp_features import ( + lint_diagnostics, + lsp_standard_handler, + lsp_completer, +) +from debputy.lsp.quickfixes import propose_correct_text_quick_fix +from debputy.lsp.spellchecking import spellcheck_line +from debputy.lsp.text_util import ( + LintCapablePositionCodec, +) +from debputy.util import _warn + +try: + from debian._deb822_repro.locatable import ( + Position as TEPosition, + Range as TERange, + START_POSITION, + ) + + from pygls.server import LanguageServer + from pygls.workspace import TextDocument +except ImportError: + pass + + +try: + from Levenshtein import distance +except ImportError: + + def _detect_possible_typo( + provided_value: str, + known_values: Iterable[str], + ) -> Sequence[str]: + return tuple() + +else: + + def _detect_possible_typo( + provided_value: str, + known_values: Iterable[str], + ) -> Sequence[str]: + k_len = len(provided_value) + candidates = [] + for known_value in known_values: + if abs(k_len - len(known_value)) > 2: + continue + d = distance(provided_value, known_value) + if d > 2: + continue + candidates.append(known_value) + return candidates + + +_CONTAINS_TAB_OR_COLON = re.compile(r"[\t:]") +_WORDS_RE = re.compile("([a-zA-Z0-9_-]+)") +_MAKE_ERROR_RE = re.compile(r"^[^:]+:(\d+):\s*(\S.+)") + + +_KNOWN_TARGETS = { + "binary", + "binary-arch", + "binary-indep", + "build", + "build-arch", + "build-indep", + "clean", +} + +_COMMAND_WORDS = frozenset( + { + "export", + "ifeq", + "ifneq", + "ifdef", + "ifndef", + "endif", + "else", + } +) + +_LANGUAGE_IDS = [ + "debian/rules", + # LSP's official language ID for Makefile + "makefile", + # emacs's name (there is no debian-rules mode) + "makefile-gmake", + # vim's name (there is no debrules) + "make", +] + + +def _as_hook_targets(command_name: str) -> Iterable[str]: + for prefix, suffix in itertools.product( + ["override_", "execute_before_", "execute_after_"], + ["", "-arch", "-indep"], + ): + yield f"{prefix}{command_name}{suffix}" + + +def _diagnostics_debian_rules( + ls: "LanguageServer", + params: Union[DidOpenTextDocumentParams, DidChangeTextDocumentParams], +) -> None: + doc = ls.workspace.get_text_document(params.text_document.uri) + if not doc.path.endswith("debian/rules"): + return + lines = doc.lines + diagnostics = _lint_debian_rules( + doc.uri, + doc.path, + lines, + doc.position_codec, + ) + ls.publish_diagnostics( + doc.uri, + diagnostics, + ) + + +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_CODE_ACTION) +lsp_standard_handler(_LANGUAGE_IDS, TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL) + + +@lint_diagnostics(_LANGUAGE_IDS) +def _lint_debian_rules_via_debputy_lsp( + doc_reference: str, + path: str, + lines: List[str], + position_codec: LintCapablePositionCodec, +) -> Optional[List[Diagnostic]]: + if not path.endswith("debian/rules"): + return None + return _lint_debian_rules( + doc_reference, + path, + lines, + position_codec, + ) + + +def _run_make_dryrun( + source_root: str, + lines: List[str], +) -> Optional[Diagnostic]: + try: + make_res = subprocess.run( + ["make", "--dry-run", "-f", "-", "debhelper-fail-me"], + input="".join(lines).encode("utf-8"), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + cwd=source_root, + timeout=1, + ) + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + else: + if make_res.returncode != 0: + make_output = make_res.stderr.decode("utf-8") + m = _MAKE_ERROR_RE.match(make_output) + if m: + # We want it zero-based and make reports it one-based + line_of_error = int(m.group(1)) - 1 + msg = m.group(2).strip() + error_range = Range( + Position( + line_of_error, + 0, + ), + Position( + line_of_error + 1, + 0, + ), + ) + # No conversion needed; it is pure line numbers + return Diagnostic( + error_range, + f"make error: {msg}", + severity=DiagnosticSeverity.Error, + source="debputy (make)", + ) + return None + + +def iter_make_lines( + lines: List[str], + position_codec: LintCapablePositionCodec, + diagnostics: List[Diagnostic], +) -> Iterator[Tuple[int, str]]: + skip_next_line = False + is_extended_comment = False + for line_no, line in enumerate(lines): + skip_this = skip_next_line + skip_next_line = False + if line.rstrip().endswith("\\"): + skip_next_line = True + + if skip_this: + if is_extended_comment: + diagnostics.extend( + spellcheck_line(lines, position_codec, line_no, line) + ) + continue + + if line.startswith("#"): + diagnostics.extend(spellcheck_line(lines, position_codec, line_no, line)) + is_extended_comment = skip_next_line + continue + is_extended_comment = False + + if line.startswith("\t") or line.isspace(): + continue + + is_extended_comment = False + # We are not really dealing with extension lines at the moment (other than for spellchecking), + # since nothing needs it + yield line_no, line + + +def _lint_debian_rules( + _doc_reference: str, + path: str, + lines: List[str], + position_codec: LintCapablePositionCodec, +) -> Optional[List[Diagnostic]]: + source_root = os.path.dirname(os.path.dirname(path)) + if source_root == "": + source_root = "." + diagnostics = [] + + make_error = _run_make_dryrun(source_root, lines) + if make_error is not None: + diagnostics.append(make_error) + all_dh_commands = _all_dh_commands(source_root) + if all_dh_commands: + all_hook_targets = {ht for c in all_dh_commands for ht in _as_hook_targets(c)} + all_hook_targets.update(_KNOWN_TARGETS) + source = "debputy (dh_assistant)" + else: + all_hook_targets = _KNOWN_TARGETS + source = "debputy" + + missing_targets = {} + + for line_no, line in iter_make_lines(lines, position_codec, diagnostics): + try: + colon_idx = line.index(":") + if len(line) > colon_idx + 1 and line[colon_idx + 1] == "=": + continue + except ValueError: + continue + target_substring = line[0:colon_idx] + if "=" in target_substring or "$(for" in target_substring: + continue + for i, m in enumerate(_WORDS_RE.finditer(target_substring)): + target = m.group(1) + if i == 0 and (target in _COMMAND_WORDS or target.startswith("(")): + break + if "%" in target or "$" in target: + continue + if target in all_hook_targets or target in missing_targets: + continue + pos, endpos = m.span(1) + hook_location = line_no, pos, endpos + missing_targets[target] = hook_location + + for target, (line_no, pos, endpos) in missing_targets.items(): + candidates = _detect_possible_typo(target, all_hook_targets) + if not candidates and not target.startswith( + ("override_", "execute_before_", "execute_after_") + ): + continue + + r_server_units = Range( + Position( + line_no, + pos, + ), + Position( + line_no, + endpos, + ), + ) + r = position_codec.range_to_client_units(lines, r_server_units) + if candidates: + msg = f"Target {target} looks like a typo of a known target" + else: + msg = f"Unknown rules dh hook target {target}" + if candidates: + fixes = [propose_correct_text_quick_fix(c) for c in candidates] + else: + fixes = [] + diagnostics.append( + Diagnostic( + r, + msg, + severity=DiagnosticSeverity.Warning, + data=fixes, + source=source, + ) + ) + return diagnostics + + +def _all_dh_commands(source_root: str) -> Optional[Sequence[str]]: + try: + output = subprocess.check_output( + ["dh_assistant", "list-commands", "--output-format=json"], + stderr=subprocess.DEVNULL, + cwd=source_root, + ) + except (FileNotFoundError, subprocess.CalledProcessError) as e: + _warn(f"dh_assistant failed (dir: {source_root}): {str(e)}") + return None + data = json.loads(output) + commands_raw = data.get("commands") if isinstance(data, dict) else None + if not isinstance(commands_raw, list): + return None + + commands = [] + + for command in commands_raw: + if not isinstance(command, dict): + return None + command_name = command.get("command") + if not command_name: + return None + commands.append(command_name) + + return commands + + +@lsp_completer(_LANGUAGE_IDS) +def _debian_rules_completions( + ls: "LanguageServer", + params: CompletionParams, +) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]: + doc = ls.workspace.get_text_document(params.text_document.uri) + if not doc.path.endswith("debian/rules"): + return None + lines = doc.lines + server_position = doc.position_codec.position_from_client_units( + lines, params.position + ) + + line = lines[server_position.line] + line_start = line[0 : server_position.character] + + if _CONTAINS_TAB_OR_COLON.search(line_start): + return None + + source_root = os.path.dirname(os.path.dirname(doc.path)) + all_commands = _all_dh_commands(source_root) + items = [CompletionItem(ht) for c in all_commands for ht in _as_hook_targets(c)] + + return items diff --git a/src/debputy/lsp/lsp_dispatch.py b/src/debputy/lsp/lsp_dispatch.py new file mode 100644 index 0000000..41e9111 --- /dev/null +++ b/src/debputy/lsp/lsp_dispatch.py @@ -0,0 +1,131 @@ +import asyncio +from typing import Dict, Sequence, Union, Optional + +from lsprotocol.types import ( + DidOpenTextDocumentParams, + DidChangeTextDocumentParams, + TEXT_DOCUMENT_DID_CHANGE, + TEXT_DOCUMENT_DID_OPEN, + TEXT_DOCUMENT_COMPLETION, + CompletionList, + CompletionItem, + CompletionParams, + TEXT_DOCUMENT_HOVER, +) + +from debputy import __version__ +from debputy.lsp.lsp_features import ( + DIAGNOSTIC_HANDLERS, + COMPLETER_HANDLERS, + HOVER_HANDLERS, +) +from debputy.util import _info + +_DOCUMENT_VERSION_TABLE: Dict[str, int] = {} + +try: + from pygls.server import LanguageServer + + DEBPUTY_LANGUAGE_SERVER = LanguageServer("debputy", f"v{__version__}") +except ImportError: + + class Mock: + + def feature(self, *args, **kwargs): + return lambda x: x + + DEBPUTY_LANGUAGE_SERVER = Mock() + + +def is_doc_at_version(uri: str, version: int) -> bool: + dv = _DOCUMENT_VERSION_TABLE.get(uri) + return dv == version + + +@DEBPUTY_LANGUAGE_SERVER.feature(TEXT_DOCUMENT_DID_OPEN) +@DEBPUTY_LANGUAGE_SERVER.feature(TEXT_DOCUMENT_DID_CHANGE) +async def _open_or_changed_document( + ls: "LanguageServer", + params: Union[DidOpenTextDocumentParams, DidChangeTextDocumentParams], +) -> None: + version = params.text_document.version + doc_uri = params.text_document.uri + doc = ls.workspace.get_text_document(doc_uri) + + _DOCUMENT_VERSION_TABLE[doc_uri] = version + + handler = DIAGNOSTIC_HANDLERS.get(doc.language_id) + if handler is None: + _info( + f"Opened/Changed document: {doc.path} ({doc.language_id}) - no diagnostics handler" + ) + return + _info( + f"Opened/Changed document: {doc.path} ({doc.language_id}) - running diagnostics for doc version {version}" + ) + last_publish_count = -1 + + diagnostics_scanner = handler(ls, params) + async for diagnostics in diagnostics_scanner: + await asyncio.sleep(0) + if not is_doc_at_version(doc_uri, version): + # This basically happens with very edit, so lets not notify the client + # for that. + _info( + f"Cancel (obsolete) diagnostics for doc version {version}: document version changed" + ) + break + if diagnostics is None or last_publish_count != len(diagnostics): + last_publish_count = len(diagnostics) if diagnostics is not None else 0 + ls.publish_diagnostics( + doc.uri, + diagnostics, + ) + + +@DEBPUTY_LANGUAGE_SERVER.feature(TEXT_DOCUMENT_COMPLETION) +def _completions( + ls: "LanguageServer", + params: CompletionParams, +) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]: + doc_uri = params.text_document.uri + doc = ls.workspace.get_text_document(doc_uri) + + handler = COMPLETER_HANDLERS.get(doc.language_id) + if handler is None: + _info( + f"Complete request for document: {doc.path} ({doc.language_id}) - no handler" + ) + return + _info( + f"Complete request for document: {doc.path} ({doc.language_id}) - delegating to handler" + ) + + return handler( + ls, + params, + ) + + +@DEBPUTY_LANGUAGE_SERVER.feature(TEXT_DOCUMENT_HOVER) +def _hover( + ls: "LanguageServer", + params: CompletionParams, +) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]: + doc_uri = params.text_document.uri + doc = ls.workspace.get_text_document(doc_uri) + + handler = HOVER_HANDLERS.get(doc.language_id) + if handler is None: + _info( + f"Hover request for document: {doc.path} ({doc.language_id}) - no handler" + ) + return + _info( + f"Hover request for document: {doc.path} ({doc.language_id}) - delegating to handler" + ) + + return handler( + ls, + params, + ) diff --git a/src/debputy/lsp/lsp_features.py b/src/debputy/lsp/lsp_features.py new file mode 100644 index 0000000..b417dd3 --- /dev/null +++ b/src/debputy/lsp/lsp_features.py @@ -0,0 +1,196 @@ +import collections +import inspect +from typing import Callable, TypeVar, Sequence, Union, Dict, List, Optional + +from lsprotocol.types import ( + TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL, + TEXT_DOCUMENT_CODE_ACTION, + DidChangeTextDocumentParams, + Diagnostic, + DidOpenTextDocumentParams, +) + +try: + from pygls.server import LanguageServer +except ImportError: + pass + +from debputy.linting.lint_util import LinterImpl +from debputy.lsp.quickfixes import provide_standard_quickfixes_from_diagnostics +from debputy.lsp.text_util import on_save_trim_end_of_line_whitespace + +C = TypeVar("C", bound=Callable) + + +DIAGNOSTIC_HANDLERS = {} +COMPLETER_HANDLERS = {} +HOVER_HANDLERS = {} +CODE_ACTION_HANDLERS = {} +WILL_SAVE_WAIT_UNTIL_HANDLERS = {} +_ALIAS_OF = {} + +_STANDARD_HANDLERS = { + TEXT_DOCUMENT_CODE_ACTION: ( + CODE_ACTION_HANDLERS, + provide_standard_quickfixes_from_diagnostics, + ), + TEXT_DOCUMENT_WILL_SAVE_WAIT_UNTIL: ( + WILL_SAVE_WAIT_UNTIL_HANDLERS, + on_save_trim_end_of_line_whitespace, + ), +} + + +def lint_diagnostics( + file_formats: Union[str, Sequence[str]] +) -> Callable[[LinterImpl], LinterImpl]: + + def _wrapper(func: C) -> C: + if not inspect.iscoroutinefunction(func): + + async def _lint_wrapper( + ls: "LanguageServer", + params: Union[ + DidOpenTextDocumentParams, + DidChangeTextDocumentParams, + ], + ) -> Optional[List[Diagnostic]]: + doc = ls.workspace.get_text_document(params.text_document.uri) + yield func( + doc.uri, + doc.path, + doc.lines, + doc.position_codec, + ) + + else: + raise ValueError("Linters are all non-async at the moment") + + for file_format in file_formats: + if file_format in DIAGNOSTIC_HANDLERS: + raise AssertionError( + "There is already a diagnostics handler for " + file_format + ) + DIAGNOSTIC_HANDLERS[file_format] = _lint_wrapper + + return func + + return _wrapper + + +def lsp_diagnostics(file_formats: Union[str, Sequence[str]]) -> Callable[[C], C]: + + def _wrapper(func: C) -> C: + + if not inspect.iscoroutinefunction(func): + + async def _linter(*args, **kwargs) -> None: + res = func(*args, **kwargs) + if inspect.isgenerator(res): + for r in res: + yield r + else: + yield res + + else: + + _linter = func + + _register_handler(file_formats, DIAGNOSTIC_HANDLERS, _linter) + + return func + + return _wrapper + + +def lsp_completer(file_formats: Union[str, Sequence[str]]) -> Callable[[C], C]: + return _registering_wrapper(file_formats, COMPLETER_HANDLERS) + + +def lsp_hover(file_formats: Union[str, Sequence[str]]) -> Callable[[C], C]: + return _registering_wrapper(file_formats, HOVER_HANDLERS) + + +def lsp_standard_handler(file_formats: Union[str, Sequence[str]], topic: str) -> None: + res = _STANDARD_HANDLERS.get(topic) + if res is None: + raise ValueError(f"No standard handler for {topic}") + + table, handler = res + + _register_handler(file_formats, table, handler) + + +def _registering_wrapper( + file_formats: Union[str, Sequence[str]], handler_dict: Dict[str, C] +) -> Callable[[C], C]: + def _wrapper(func: C) -> C: + _register_handler(file_formats, handler_dict, func) + return func + + return _wrapper + + +def _register_handler( + file_formats: Union[str, Sequence[str]], + handler_dict: Dict[str, C], + handler: C, +) -> None: + if isinstance(file_formats, str): + file_formats = [file_formats] + else: + if not file_formats: + raise ValueError("At least one language ID (file format) must be provided") + main = file_formats[0] + for alias in file_formats[1:]: + if alias not in _ALIAS_OF: + _ALIAS_OF[alias] = main + + for file_format in file_formats: + if file_format in handler_dict: + raise AssertionError(f"There is already a handler for {file_format}") + + handler_dict[file_format] = handler + + +def ensure_lsp_features_are_loaded() -> None: + # FIXME: This import is needed to force loading of the LSP files. But it only works + # for files with a linter (which currently happens to be all of them, but this is + # a bit fragile). + from debputy.linting.lint_impl import LINTER_FORMATS + + assert LINTER_FORMATS + + +def describe_lsp_features() -> None: + + ensure_lsp_features_are_loaded() + + feature_list = [ + ("diagnostics (lint)", DIAGNOSTIC_HANDLERS), + ("code actions/quickfixes", CODE_ACTION_HANDLERS), + ("completion suggestions", COMPLETER_HANDLERS), + ("hover docs", HOVER_HANDLERS), + ("on-save handler", WILL_SAVE_WAIT_UNTIL_HANDLERS), + ] + print("LSP language IDs and their features:") + all_ids = sorted(set(lid for _, t in feature_list for lid in t)) + for lang_id in all_ids: + if lang_id in _ALIAS_OF: + continue + features = [n for n, t in feature_list if lang_id in t] + print(f" * {lang_id}:") + for feature in features: + print(f" - {feature}") + + aliases = collections.defaultdict(list) + for lang_id in all_ids: + main_lang = _ALIAS_OF.get(lang_id) + if main_lang is None: + continue + aliases[main_lang].append(lang_id) + + print() + print("Aliases:") + for main_id, aliases in aliases.items(): + print(f" * {main_id}: {', '.join(aliases)}") diff --git a/src/debputy/lsp/lsp_generic_deb822.py b/src/debputy/lsp/lsp_generic_deb822.py new file mode 100644 index 0000000..245f3de --- /dev/null +++ b/src/debputy/lsp/lsp_generic_deb822.py @@ -0,0 +1,221 @@ +import re +from typing import ( + Optional, + Union, + Sequence, + Tuple, + Set, + Any, + Container, + List, +) + +from lsprotocol.types import ( + CompletionParams, + CompletionList, + CompletionItem, + Position, + CompletionItemTag, + MarkupContent, + Hover, + MarkupKind, + HoverParams, +) + +from debputy.lsp.lsp_debian_control_reference_data import ( + Deb822FileMetadata, + Deb822KnownField, + StanzaMetadata, +) +from debputy.lsp.text_util import normalize_dctrl_field_name +from debputy.util import _info + +try: + from pygls.server import LanguageServer + from pygls.workspace import TextDocument +except ImportError: + pass + + +_CONTAINS_SPACE_OR_COLON = re.compile(r"[\s:]") + + +def _at_cursor( + doc: "TextDocument", + lines: List[str], + client_position: Position, +) -> Tuple[Optional[str], str, bool, int, Set[str]]: + paragraph_no = -1 + paragraph_started = False + seen_fields = set() + last_field_seen: Optional[str] = None + current_field: Optional[str] = None + server_position = doc.position_codec.position_from_client_units( + lines, + client_position, + ) + position_line_no = server_position.line + + line_at_position = lines[position_line_no] + line_start = "" + if server_position.character: + line_start = line_at_position[0 : server_position.character] + + for line_no, line in enumerate(lines): + if not line or line.isspace(): + if line_no == position_line_no: + current_field = last_field_seen + continue + last_field_seen = None + if line_no > position_line_no: + break + paragraph_started = False + elif line and line[0] == "#": + continue + elif line and not line[0].isspace() and ":" in line: + if not paragraph_started: + paragraph_started = True + seen_fields = set() + paragraph_no += 1 + key, _ = line.split(":", 1) + key_lc = key.lower() + last_field_seen = key_lc + if line_no == position_line_no: + current_field = key_lc + seen_fields.add(key_lc) + + in_value = bool(_CONTAINS_SPACE_OR_COLON.search(line_start)) + current_word = doc.word_at_position(client_position) + if current_field is not None: + current_field = normalize_dctrl_field_name(current_field) + return current_field, current_word, in_value, paragraph_no, seen_fields + + +def deb822_completer( + ls: "LanguageServer", + params: CompletionParams, + file_metadata: Deb822FileMetadata[Any], +) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]: + doc = ls.workspace.get_text_document(params.text_document.uri) + lines = doc.lines + + current_field, _, in_value, paragraph_no, seen_fields = _at_cursor( + doc, + lines, + params.position, + ) + + stanza_metadata = file_metadata.guess_stanza_classification_by_idx(paragraph_no) + + if in_value: + _info(f"Completion for field value {current_field}") + if current_field is None: + return None + known_field = stanza_metadata.get(current_field) + if known_field is None: + return None + items = _complete_field_value(known_field) + else: + _info("Completing field name") + items = _complete_field_name( + stanza_metadata, + seen_fields, + ) + + _info(f"Completion candidates: {items}") + + return items + + +def deb822_hover( + ls: "LanguageServer", + params: HoverParams, + file_metadata: Deb822FileMetadata[Any], +) -> Optional[Hover]: + doc = ls.workspace.get_text_document(params.text_document.uri) + lines = doc.lines + current_field, word_at_position, in_value, paragraph_no, _ = _at_cursor( + doc, lines, params.position + ) + stanza_metadata = file_metadata.guess_stanza_classification_by_idx(paragraph_no) + + if current_field is None: + _info("No hover information as we cannot determine which field it is for") + return None + known_field = stanza_metadata.get(current_field) + + if known_field is None: + return None + if in_value: + if not known_field.known_values: + return + keyword = known_field.known_values.get(word_at_position) + if keyword is None: + return + hover_text = keyword.hover_text + else: + hover_text = known_field.hover_text + if hover_text is None: + hover_text = f"The field {current_field} had no documentation." + + try: + supported_formats = ls.client_capabilities.text_document.hover.content_format + except AttributeError: + supported_formats = [] + + _info(f"Supported formats {supported_formats}") + markup_kind = MarkupKind.Markdown + if markup_kind not in supported_formats: + markup_kind = MarkupKind.PlainText + return Hover( + contents=MarkupContent( + kind=markup_kind, + value=hover_text, + ) + ) + + +def _should_complete_field_with_value(cand: Deb822KnownField) -> bool: + return cand.known_values is not None and ( + len(cand.known_values) == 1 + or ( + len(cand.known_values) == 2 + and cand.warn_if_default + and cand.default_value is not None + ) + ) + + +def _complete_field_name( + fields: StanzaMetadata[Any], + seen_fields: Container[str], +) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]: + items = [] + for cand_key, cand in fields.items(): + if cand_key.lower() in seen_fields: + continue + name = cand.name + complete_as = name + ": " + if _should_complete_field_with_value(cand): + value = next(iter(v for v in cand.known_values if v != cand.default_value)) + complete_as += value + tags = [] + if cand.replaced_by or cand.deprecated_with_no_replacement: + tags.append(CompletionItemTag.Deprecated) + + items.append( + CompletionItem( + name, + insert_text=complete_as, + tags=tags, + ) + ) + return items + + +def _complete_field_value( + field: Deb822KnownField, +) -> Optional[Union[CompletionList, Sequence[CompletionItem]]]: + if field.known_values is None: + return None + return [CompletionItem(v) for v in field.known_values] diff --git a/src/debputy/lsp/quickfixes.py b/src/debputy/lsp/quickfixes.py new file mode 100644 index 0000000..d911961 --- /dev/null +++ b/src/debputy/lsp/quickfixes.py @@ -0,0 +1,202 @@ +from typing import ( + Literal, + TypedDict, + Callable, + Iterable, + Union, + TypeVar, + Mapping, + Dict, + Optional, + List, + cast, +) + +from lsprotocol.types import ( + CodeAction, + Command, + CodeActionParams, + Diagnostic, + CodeActionDisabledType, + TextEdit, + WorkspaceEdit, + TextDocumentEdit, + OptionalVersionedTextDocumentIdentifier, + Range, + Position, + CodeActionKind, +) + +from debputy.util import _warn + +try: + from debian._deb822_repro.locatable import Position as TEPosition, Range as TERange + + from pygls.server import LanguageServer + from pygls.workspace import TextDocument +except ImportError: + pass + + +CodeActionName = Literal["correct-text", "remove-line"] + + +class CorrectTextCodeAction(TypedDict): + code_action: Literal["correct-text"] + correct_value: str + + +class RemoveLineCodeAction(TypedDict): + code_action: Literal["remove-line"] + + +def propose_correct_text_quick_fix(correct_value: str) -> CorrectTextCodeAction: + return { + "code_action": "correct-text", + "correct_value": correct_value, + } + + +def propose_remove_line_quick_fix() -> RemoveLineCodeAction: + return { + "code_action": "remove-line", + } + + +CODE_ACTION_HANDLERS: Dict[ + CodeActionName, + Callable[ + [Mapping[str, str], CodeActionParams, Diagnostic], + Iterable[Union[CodeAction, Command]], + ], +] = {} +M = TypeVar("M", bound=Mapping[str, str]) +Handler = Callable[ + [M, CodeActionParams, Diagnostic], + Iterable[Union[CodeAction, Command]], +] + + +def _code_handler_for(action_name: CodeActionName) -> Callable[[Handler], Handler]: + def _wrapper(func: Handler) -> Handler: + assert action_name not in CODE_ACTION_HANDLERS + CODE_ACTION_HANDLERS[action_name] = func + return func + + return _wrapper + + +@_code_handler_for("correct-text") +def _correct_value_code_action( + code_action_data: CorrectTextCodeAction, + code_action_params: CodeActionParams, + diagnostic: Diagnostic, +) -> Iterable[Union[CodeAction, Command]]: + corrected_value = code_action_data["correct_value"] + edits = [ + TextEdit( + diagnostic.range, + corrected_value, + ), + ] + yield CodeAction( + title=f'Replace with "{corrected_value}"', + kind=CodeActionKind.QuickFix, + diagnostics=[diagnostic], + edit=WorkspaceEdit( + changes={code_action_params.text_document.uri: edits}, + document_changes=[ + TextDocumentEdit( + text_document=OptionalVersionedTextDocumentIdentifier( + uri=code_action_params.text_document.uri, + ), + edits=edits, + ) + ], + ), + ) + + +def range_compatible_with_remove_line_fix(range_: Range) -> bool: + start = range_.start + end = range_.end + if start.line != end.line and (start.line + 1 != end.line or end.character > 0): + return False + return True + + +@_code_handler_for("remove-line") +def _correct_value_code_action( + _code_action_data: RemoveLineCodeAction, + code_action_params: CodeActionParams, + diagnostic: Diagnostic, +) -> Iterable[Union[CodeAction, Command]]: + start = code_action_params.range.start + if range_compatible_with_remove_line_fix(code_action_params.range): + _warn( + "Bug: the quick was used for a diagnostic that spanned multiple lines and would corrupt the file." + ) + return + + edits = [ + TextEdit( + Range( + start=Position( + line=start.line, + character=0, + ), + end=Position( + line=start.line + 1, + character=0, + ), + ), + "", + ), + ] + yield CodeAction( + title="Remove the line", + kind=CodeActionKind.QuickFix, + diagnostics=[diagnostic], + edit=WorkspaceEdit( + changes={code_action_params.text_document.uri: edits}, + document_changes=[ + TextDocumentEdit( + text_document=OptionalVersionedTextDocumentIdentifier( + uri=code_action_params.text_document.uri, + ), + edits=edits, + ) + ], + ), + ) + + +def provide_standard_quickfixes_from_diagnostics( + code_action_params: CodeActionParams, +) -> Optional[List[Union[Command, CodeAction]]]: + actions = [] + for diagnostic in code_action_params.context.diagnostics: + data = diagnostic.data + if not isinstance(data, list): + data = [data] + for action_suggestion in data: + if ( + action_suggestion + and isinstance(action_suggestion, Mapping) + and "code_action" in action_suggestion + ): + action_name: CodeActionName = action_suggestion["code_action"] + handler = CODE_ACTION_HANDLERS.get(action_name) + if handler is not None: + actions.extend( + handler( + cast("Mapping[str, str]", action_suggestion), + code_action_params, + diagnostic, + ) + ) + else: + _warn(f"No codeAction handler for {action_name} !?") + if not actions: + return None + return actions diff --git a/src/debputy/lsp/spellchecking.py b/src/debputy/lsp/spellchecking.py new file mode 100644 index 0000000..69dd119 --- /dev/null +++ b/src/debputy/lsp/spellchecking.py @@ -0,0 +1,304 @@ +import functools +import itertools +import os +import re +import subprocess +from typing import Iterable, FrozenSet, Tuple, Optional, List + +from debian.debian_support import Release +from lsprotocol.types import Diagnostic, Range, Position, DiagnosticSeverity + +from debputy.lsp.quickfixes import propose_correct_text_quick_fix +from debputy.lsp.text_util import LintCapablePositionCodec +from debputy.util import _info, _warn + +_SPELL_CHECKER_DICT = "/usr/share/hunspell/en_US.dic" +_SPELL_CHECKER_AFF = "/usr/share/hunspell/en_US.aff" +_WORD_PARTS = re.compile(r"(\S+)") +_PRUNE_SYMBOLS_RE = re.compile(r"(\w+(?:-\w+|'\w+)?)") +_FIND_QUOTE_CHAR = re.compile(r'["`]') +_LOOKS_LIKE_FILENAME = re.compile( + r""" + [.]{0,3}/[a-z0-9]+(/[a-z0-9]+)+/* + | [a-z0-9-_]+(/[a-z0-9]+)+/* + | [a-z0-9_]+(/[a-z0-9_]+){2,}/* + | (?:\S+)?[.][a-z]{1,3} + +""", + re.VERBOSE, +) +_LOOKS_LIKE_PROGRAMMING_TERM = re.compile( + r""" + ( + # Java identifier Camel Case + [a-z][a-z0-9]*(?:[A-Z]{1,3}[a-z0-9]+)+ + # Type name Camel Case + | [A-Z]{1,3}[a-z0-9]+(?:[A-Z]{1,3}[a-z0-9]+)+ + # Type name Camel Case with underscore (seen in Dh_Lib.pm among other + | [A-Z]{1,3}[a-z0-9]+(?:_[A-Z]{1,3}[a-z0-9]+)+ + # Perl module + | [A-Z]{1,3}[a-z0-9]+(?:_[A-Z]{1,3}[a-z0-9]+)*(::[A-Z]{1,3}[a-z0-9]+(?:_[A-Z]{1,3}[a-z0-9]+)*)+ + # Probably an abbreviation + | [A-Z]{3,} + # Perl/Python identifiers or Jinja templates + | [$%&@_]?[{]?[{]?[a-z][a-z0-9]*(?:_[a-z0-9]+)+(?:(?:->)?[\[{]\S+|}}?)? + # SCREAMING_SNAKE_CASE (environment variables plus -DVAR=B or $FOO) + | [-$%&*_]{0,2}[A-Z][A-Z0-9]*(_[A-Z0-9]+)+(?:=\S+)? + | \#[A-Z][A-Z0-9]*(_[A-Z0-9]+)+\# + # Subcommand names. Require at least two "-" to avoid skipping hypenated words + | [a-z][a-z0-9]*(-[a-z0-9]+){2,} + # Short args + | -[a-z0-9]+ + # Things like 32bit + | \d{2,}-?[a-z]+ + # Source package (we do not have a package without prefix/suffix because it covers 95% of all lowercase words) + | src:[a-z0-9][-+.a-z0-9]+ + | [a-z0-9][-+.a-z0-9]+:(?:any|native) + # Version + | v\d+(?:[.]\S+)? + # chmod symbolic mode or math + | \S*=\S+ + ) +""", + re.VERBOSE, +) +_LOOKS_LIKE_EMAIL = re.compile( + r""" + <[^>@\s]+@[^>@\s]+> +""", + re.VERBOSE, +) +_NO_CORRECTIONS = tuple() +_WORDLISTS = [ + "debian-wordlist.dic", +] +_NAMELISTS = [ + "logins-and-people.dic", +] +_PERSONAL_DICTS = [ + "${HOME}/.hunspell_default", + "${HOME}/.hunspell_en_US", +] + + +try: + if not os.path.lexists(_SPELL_CHECKER_DICT) or not os.path.lexists( + _SPELL_CHECKER_AFF + ): + raise ImportError + from hunspell import HunSpell + + _HAS_HUNSPELL = True +except ImportError: + _HAS_HUNSPELL = False + + +def _read_wordlist( + base_dir: str, wordlist_name: str, *, namelist: bool = False +) -> Iterable[str]: + with open(os.path.join(base_dir, wordlist_name)) as fd: + w = [w.strip() for w in fd] + yield from w + if namelist: + yield from (f"{n}'s" for n in w) + + +def _all_debian_archs() -> Iterable[str]: + try: + output = subprocess.check_output(["dpkg-architecture", "-L"]) + except (FileNotFoundError, subprocess.CalledProcessError) as e: + _warn(f"dpkg-architecture -L failed: {e}") + return tuple() + + return (x.strip() for x in output.decode("utf-8").splitlines()) + + +@functools.lru_cache +def _builtin_exception_words() -> FrozenSet[str]: + basedirs = os.path.dirname(__file__) + release_names = (x for x in Release.releases) + return frozenset( + itertools.chain( + itertools.chain.from_iterable( + _read_wordlist(basedirs, wl) for wl in _WORDLISTS + ), + itertools.chain.from_iterable( + _read_wordlist(basedirs, wl, namelist=True) for wl in _NAMELISTS + ), + release_names, + _all_debian_archs(), + ) + ) + + +_DEFAULT_SPELL_CHECKER: Optional["Spellchecker"] = None + + +def spellcheck_line( + lines: List[str], + position_codec: LintCapablePositionCodec, + line_no: int, + line: str, +) -> Iterable[Diagnostic]: + spell_checker = default_spellchecker() + for word, pos, endpos in spell_checker.iter_words(line): + corrections = spell_checker.provide_corrections_for(word) + if not corrections: + continue + word_range_server_units = Range( + Position(line_no, pos), + Position(line_no, endpos), + ) + word_range = position_codec.range_to_client_units( + lines, + word_range_server_units, + ) + yield Diagnostic( + word_range, + f'Spelling "{word}"', + severity=DiagnosticSeverity.Hint, + source="debputy", + data=[propose_correct_text_quick_fix(c) for c in corrections], + ) + + +def default_spellchecker() -> "Spellchecker": + global _DEFAULT_SPELL_CHECKER + spellchecker = _DEFAULT_SPELL_CHECKER + if spellchecker is None: + if _HAS_HUNSPELL: + spellchecker = HunspellSpellchecker() + else: + spellchecker = _do_nothing_spellchecker() + _DEFAULT_SPELL_CHECKER = spellchecker + return spellchecker + + +@functools.lru_cache() +def _do_nothing_spellchecker() -> "Spellchecker": + return EverythingIsCorrectSpellchecker() + + +def disable_spellchecking() -> None: + global _DEFAULT_SPELL_CHECKER + _DEFAULT_SPELL_CHECKER = _do_nothing_spellchecker() + + +def _skip_quoted_parts(line: str) -> Iterable[Tuple[str, int]]: + current_pos = 0 + while True: + try: + m = _FIND_QUOTE_CHAR.search(line, current_pos) + if m is None: + if current_pos == 0: + yield line, 0 + else: + yield line[current_pos:], current_pos + return + starting_marker_pos = m.span()[0] + quote_char = m.group() + end_marker_pos = line.index(quote_char, starting_marker_pos + 1) + except ValueError: + yield line[current_pos:], current_pos + return + + part = line[current_pos:starting_marker_pos] + + if not part.isspace(): + yield part, current_pos + current_pos = end_marker_pos + 1 + + +def _split_line_to_words(line: str) -> Iterable[Tuple[str, int, int]]: + for line_part, part_pos in _skip_quoted_parts(line): + for m in _WORD_PARTS.finditer(line_part): + fullword = m.group(1) + if fullword.startswith("--"): + # CLI arg + continue + if _LOOKS_LIKE_PROGRAMMING_TERM.match(fullword): + continue + if _LOOKS_LIKE_FILENAME.match(fullword): + continue + if _LOOKS_LIKE_EMAIL.match(fullword): + continue + mpos = m.span(1)[0] + for sm in _PRUNE_SYMBOLS_RE.finditer(fullword): + pos, endpos = sm.span(1) + offset = part_pos + mpos + yield sm.group(1), pos + offset, endpos + offset + + +class Spellchecker: + + @staticmethod + def do_nothing_spellchecker() -> "Spellchecker": + return EverythingIsCorrectSpellchecker() + + def iter_words(self, line: str) -> Iterable[Tuple[str, int, int]]: + yield from _split_line_to_words(line) + + def provide_corrections_for(self, word: str) -> Iterable[str]: + raise NotImplementedError + + def ignore_word(self, word: str) -> None: + raise NotImplementedError + + +class EverythingIsCorrectSpellchecker(Spellchecker): + def provide_corrections_for(self, word: str) -> Iterable[str]: + return _NO_CORRECTIONS + + def ignore_word(self, word: str) -> None: + # It is hard to ignore words, when you never check them in the fist place. + pass + + +class HunspellSpellchecker(Spellchecker): + + def __init__(self) -> None: + self._checker = HunSpell(_SPELL_CHECKER_DICT, _SPELL_CHECKER_AFF) + for w in _builtin_exception_words(): + self._checker.add(w) + self._load_personal_exclusions() + + def provide_corrections_for(self, word: str) -> Iterable[str]: + if word.startswith( + ( + "dpkg-", + "dh-", + "dh_", + "debian-", + "debconf-", + "update-", + "DEB_", + "DPKG_", + ) + ): + return _NO_CORRECTIONS + # 'ing is deliberately forcing a word into another word-class + if word.endswith(("'ing", "-nss")): + return _NO_CORRECTIONS + return self._lookup(word) + + @functools.lru_cache(128) + def _lookup(self, word: str) -> Iterable[str]: + if self._checker.spell(word): + return _NO_CORRECTIONS + return self._checker.suggest(word) + + def ignore_word(self, word: str) -> None: + self._checker.add(word) + + def _load_personal_exclusions(self) -> None: + for filename in _PERSONAL_DICTS: + if filename.startswith("${"): + end_index = filename.index("}") + varname = filename[2:end_index] + value = os.environ.get(varname) + if value is None: + continue + filename = value + filename[end_index + 1 :] + if os.path.isfile(filename): + _info(f"Loading personal spelling dictionary from {filename}") + self._checker.add_dic(filename) diff --git a/src/debputy/lsp/text_edit.py b/src/debputy/lsp/text_edit.py new file mode 100644 index 0000000..770a837 --- /dev/null +++ b/src/debputy/lsp/text_edit.py @@ -0,0 +1,110 @@ +# Copied and adapted from on python-lsp-server +# +# Copyright 2017-2020 Palantir Technologies, Inc. +# Copyright 2021- Python Language Server Contributors. +# License: Expat (MIT/X11) +# +from typing import List + +from lsprotocol.types import Range, TextEdit, Position + + +def get_well_formatted_range(lsp_range: Range) -> Range: + start = lsp_range.start + end = lsp_range.end + + if start.line > end.line or ( + start.line == end.line and start.character > end.character + ): + return Range(end, start) + + return lsp_range + + +def get_well_formatted_edit(text_edit: TextEdit) -> TextEdit: + lsp_range = get_well_formatted_range(text_edit.range) + if lsp_range != text_edit.range: + return TextEdit(new_text=text_edit.new_text, range=lsp_range) + + return text_edit + + +def compare_text_edits(a: TextEdit, b: TextEdit) -> int: + diff = a.range.start.line - b.range.start.line + if diff == 0: + return a.range.start.character - b.range.start.character + + return diff + + +def merge_sort_text_edits(text_edits: List[TextEdit]) -> List[TextEdit]: + if len(text_edits) <= 1: + return text_edits + + p = len(text_edits) // 2 + left = text_edits[:p] + right = text_edits[p:] + + merge_sort_text_edits(left) + merge_sort_text_edits(right) + + left_idx = 0 + right_idx = 0 + i = 0 + while left_idx < len(left) and right_idx < len(right): + ret = compare_text_edits(left[left_idx], right[right_idx]) + if ret <= 0: + # smaller_equal -> take left to preserve order + text_edits[i] = left[left_idx] + i += 1 + left_idx += 1 + else: + # greater -> take right + text_edits[i] = right[right_idx] + i += 1 + right_idx += 1 + while left_idx < len(left): + text_edits[i] = left[left_idx] + i += 1 + left_idx += 1 + while right_idx < len(right): + text_edits[i] = right[right_idx] + i += 1 + right_idx += 1 + return text_edits + + +class OverLappingTextEditException(Exception): + """ + Text edits are expected to be sorted + and compressed instead of overlapping. + This error is raised when two edits + are overlapping. + """ + + +def offset_at_position(lines: List[str], server_position: Position) -> int: + row, col = server_position.line, server_position.character + return col + sum(len(line) for line in lines[:row]) + + +def apply_text_edits(text: str, lines: List[str], text_edits: List[TextEdit]) -> str: + sorted_edits = merge_sort_text_edits( + [get_well_formatted_edit(e) for e in text_edits] + ) + last_modified_offset = 0 + spans = [] + for e in sorted_edits: + start_offset = offset_at_position(lines, e.range.start) + if start_offset < last_modified_offset: + raise OverLappingTextEditException("overlapping edit") + + if start_offset > last_modified_offset: + spans.append(text[last_modified_offset:start_offset]) + + if e.new_text != "": + spans.append(e.new_text) + last_modified_offset = offset_at_position(lines, e.range.end) + + spans.append(text[last_modified_offset:]) + return "".join(spans) diff --git a/src/debputy/lsp/text_util.py b/src/debputy/lsp/text_util.py new file mode 100644 index 0000000..d66cb28 --- /dev/null +++ b/src/debputy/lsp/text_util.py @@ -0,0 +1,122 @@ +from typing import List, Optional, Sequence, Union, Iterable + +from lsprotocol.types import ( + TextEdit, + Position, + Range, + WillSaveTextDocumentParams, +) + +from debputy.linting.lint_util import LinterPositionCodec + +try: + from debian._deb822_repro.locatable import Position as TEPosition, Range as TERange +except ImportError: + pass + +try: + from pygls.workspace import LanguageServer, TextDocument, PositionCodec + + LintCapablePositionCodec = Union[LinterPositionCodec, PositionCodec] +except ImportError: + LintCapablePositionCodec = LinterPositionCodec + + +try: + from Levenshtein import distance +except ImportError: + + def detect_possible_typo( + provided_value: str, + known_values: Iterable[str], + ) -> Sequence[str]: + return tuple() + +else: + + def detect_possible_typo( + provided_value: str, + known_values: Iterable[str], + ) -> Sequence[str]: + k_len = len(provided_value) + candidates = [] + for known_value in known_values: + if abs(k_len - len(known_value)) > 2: + continue + d = distance(provided_value, known_value) + if d > 2: + continue + candidates.append(known_value) + return candidates + + +def normalize_dctrl_field_name(f: str) -> str: + if not f or not f.startswith(("x", "X")): + return f + i = 0 + for i in range(1, len(f)): + if f[i] == "-": + i += 1 + break + if f[i] not in ("b", "B", "s", "S", "c", "C"): + return f + assert i > 0 + return f[i:] + + +def on_save_trim_end_of_line_whitespace( + ls: "LanguageServer", + params: WillSaveTextDocumentParams, +) -> Optional[Sequence[TextEdit]]: + doc = ls.workspace.get_text_document(params.text_document.uri) + return trim_end_of_line_whitespace(doc, doc.lines) + + +def trim_end_of_line_whitespace( + doc: "TextDocument", + lines: List[str], +) -> Optional[Sequence[TextEdit]]: + edits = [] + for line_no, orig_line in enumerate(lines): + orig_len = len(orig_line) + if orig_line.endswith("\n"): + orig_len -= 1 + stripped_len = len(orig_line.rstrip()) + if stripped_len == orig_len: + continue + + edit_range = doc.position_codec.range_to_client_units( + lines, + Range( + Position( + line_no, + stripped_len, + ), + Position( + line_no, + orig_len, + ), + ), + ) + edits.append( + TextEdit( + edit_range, + "", + ) + ) + + return edits + + +def te_position_to_lsp(te_position: "TEPosition") -> Position: + return Position( + te_position.line_position, + te_position.cursor_position, + ) + + +def te_range_to_lsp(te_range: "TERange") -> Range: + return Range( + te_position_to_lsp(te_range.start_pos), + te_position_to_lsp(te_range.end_pos), + ) diff --git a/src/debputy/lsp/vendoring/__init__.py b/src/debputy/lsp/vendoring/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/lsp/vendoring/__init__.py diff --git a/src/debputy/lsp/vendoring/_deb822_repro/__init__.py b/src/debputy/lsp/vendoring/_deb822_repro/__init__.py new file mode 100644 index 0000000..72fe6dc --- /dev/null +++ b/src/debputy/lsp/vendoring/_deb822_repro/__init__.py @@ -0,0 +1,191 @@ +# The "from X import Y as Y" looks weird, but we are stuck in a fight +# between mypy and pylint in the CI. +# +# mypy --strict insists on either of following for re-exporting +# 1) Do a "from debian._deb822_repro.X import *" +# 2) Do a "from .X import Y" +# 3) Do a "from debian._deb822_repro.X import Y as Z" +# +# pylint on the CI fails on relative imports (it assumes "lib" is a +# part of the python package name in relative imports). This rules +# out 2) from the mypy list. The use of 1) would cause overlapping +# imports (and also it felt prudent to import only what was exported). +# +# This left 3) as the only option for now, which pylint then complains +# about (not unreasonably in general). Unfortunately, we can disable +# that warning in this work around. But once 2) becomes an option +# without pylint tripping over itself on the CI, then it considerably +# better than this approach. +# + +""" Round-trip safe dictionary-like interfaces to RFC822-like files + +This module is a round-trip safe API for working with RFC822-like Debian data +formats. It is primarily aimed files managed by humans, like debian/control. +While it is be able to process any Deb822 file, you might find the debian.deb822 +module better suited for larger files such as the `Packages` and `Sources` +from the Debian archive due to reasons explained below. + +Being round-trip safe means that this module will faithfully preserve the original +formatting including whitespace and comments from the input where not modified. +A concrete example:: + + >>> from debian._deb822_repro import parse_deb822_file + >>> example_deb822_paragraph = ''' + ... Package: foo + ... # Field comment (because it becomes just before a field) + ... Section: main/devel + ... Depends: libfoo, + ... # Inline comment (associated with the next line) + ... libbar, + ... ''' + >>> deb822_file = parse_deb822_file(example_deb822_paragraph.splitlines()) + >>> paragraph = next(iter(deb822_file)) + >>> paragraph['Section'] = 'devel' + >>> output = deb822_file.dump() + >>> output == example_deb822_paragraph.replace('Section: main/devel', 'Section: devel') + True + +This makes it particularly good for automated changes/corrections to files (partly) +maintained by humans. + +Compared to debian.deb822 +------------------------- + +The round-trip safe API is primarily useful when your program is editing files +and the file in question is (likely) to be hand-edited or formated directly by +human maintainers. This includes files like debian/control and the +debian/copyright using the "DEP-5" format. + +The round-trip safe API also supports parsing and working with invalid files. +This enables programs to work on the file in cases where the file was a left +with an error in an attempt to correct it (or ignore it). + +On the flip side, the debian.deb822 module generally uses less memory than the +round trip safe API. In some cases, it will also have faster data structures +because its internal data structures are simpler. Accordingly, when you are doing +read-only work or/and working with large files a la the Packages or Sources +files from the Debian archive, then the round-trip safe API either provides no +advantages or its trade-offs might show up in performance statistics. + +The memory and runtime performance difference should generally be constant for +valid files but not necessarily a small one. For invalid files, some operations +can degrade in runtime performance in particular cases (memory performance for +invalid files are comparable to that of valid files). + +Converting from debian.deb822 +============================= + +The following is a short example for how to migrate from debian.deb822 to +the round-trip safe API. Given the following source text:: + + >>> dctrl_input = b''' + ... Source: foo + ... Build-Depends: debhelper-compat (= 13) + ... + ... Package: bar + ... Architecture: any + ... Depends: ${misc:Depends}, + ... ${shlibs:Depends}, + ... Description: provides some exciting feature + ... yada yada yada + ... . + ... more deskription with a misspelling + ... '''.lstrip() # To remove the leading newline + >>> # A few definitions to emulate file I/O (would be different in the program) + >>> import contextlib, os + >>> @contextlib.contextmanager + ... def open_input(): + ... # Works with and without keepends=True. + ... # Keep the ends here to truly emulate an open file. + ... yield dctrl_input.splitlines(keepends=True) + >>> def open_output(): + ... return open(os.devnull, 'wb') + +With debian.deb822, your code might look like this:: + + >>> from debian.deb822 import Deb822 + >>> with open_input() as in_fd, open_output() as out_fd: + ... for paragraph in Deb822.iter_paragraphs(in_fd): + ... if 'Description' not in paragraph: + ... continue + ... description = paragraph['Description'] + ... # Fix typo + ... paragraph['Description'] = description.replace('deskription', 'description') + ... paragraph.dump(out_fd) + +With the round-trip safe API, the rewrite would look like this:: + + >>> from debian._deb822_repro import parse_deb822_file + >>> with open_input() as in_fd, open_output() as out_fd: + ... parsed_file = parse_deb822_file(in_fd) + ... for paragraph in parsed_file: + ... if 'Description' not in paragraph: + ... continue + ... description = paragraph['Description'] + ... # Fix typo + ... paragraph['Description'] = description.replace('deskription', 'description') + ... parsed_file.dump(out_fd) + +Key changes are: + + 1. Imports are different. + 2. Deb822.iter_paragraphs is replaced by parse_deb822_file and a reference to + its return value is kept for later. + 3. Instead of dumping paragraphs one by one, the return value from + parse_deb822_file is dumped at the end. + + - The round-trip safe api does support "per-paragraph" but formatting + and comments between paragraphs would be lost in the output. This may + be an acceptable tradeoff or desired for some cases. + +Note that the round trip safe API does not accept all the same parameters as the +debian.deb822 module does. Often this is because the feature is not relevant for +the round-trip safe API (e.g., python-apt cannot be used as it discard comments) +or is obsolete in the debian.deb822 module and therefore omitted. + +For list based fields, you may want to have a look at the +Deb822ParagraphElement.as_interpreted_dict_view method. + +Stability of this API +--------------------- + +The API is subject to change based on feedback from early adoptors and beta +testers. That said, the code for valid files is unlikely to change in +a backwards incompatible way. + +Things that might change in an incompatible way include: + * Whether invalid files are accepted (parsed without errors) by default. + (currently they are) + * How invalid files are parsed. As an example, currently a syntax error acts + as a paragraph separator. Whether it should is open to debate. + +""" + +# pylint: disable=useless-import-alias +from .parsing import ( + parse_deb822_file as parse_deb822_file, + LIST_SPACE_SEPARATED_INTERPRETATION as LIST_SPACE_SEPARATED_INTERPRETATION, + LIST_COMMA_SEPARATED_INTERPRETATION as LIST_COMMA_SEPARATED_INTERPRETATION, + Interpretation as Interpretation, + # Primarily for documentation purposes / help() + Deb822FileElement as Deb822FileElement, + Deb822NoDuplicateFieldsParagraphElement, + Deb822ParagraphElement as Deb822ParagraphElement, +) +from .types import ( + AmbiguousDeb822FieldKeyError as AmbiguousDeb822FieldKeyError, + SyntaxOrParseError, +) + +__all__ = [ + "parse_deb822_file", + "AmbiguousDeb822FieldKeyError", + "LIST_SPACE_SEPARATED_INTERPRETATION", + "LIST_COMMA_SEPARATED_INTERPRETATION", + "Interpretation", + "Deb822FileElement", + "Deb822NoDuplicateFieldsParagraphElement", + "Deb822ParagraphElement", + "SyntaxOrParseError", +] diff --git a/src/debputy/lsp/vendoring/_deb822_repro/_util.py b/src/debputy/lsp/vendoring/_deb822_repro/_util.py new file mode 100644 index 0000000..a79426d --- /dev/null +++ b/src/debputy/lsp/vendoring/_deb822_repro/_util.py @@ -0,0 +1,291 @@ +import collections +import collections.abc +import logging +import sys +import textwrap +from abc import ABC + +try: + from typing import ( + Optional, + Union, + Iterable, + Callable, + TYPE_CHECKING, + Iterator, + Type, + cast, + List, + Generic, + ) + from debian._util import T + from .types import TE, R, TokenOrElement + + _combine_parts_ret_type = Callable[ + [Iterable[Union[TokenOrElement, TE]]], Iterable[Union[TokenOrElement, R]] + ] +except ImportError: + # pylint: disable=unnecessary-lambda-assignment + TYPE_CHECKING = False + cast = lambda t, v: v + + +if TYPE_CHECKING: + from .parsing import Deb822Element + from .tokens import Deb822Token + + +def print_ast( + ast_tree, # type: Union[Iterable[TokenOrElement], 'Deb822Element'] + *, + end_marker_after=5, # type: Optional[int] + output_function=None # type: Optional[Callable[[str], None]] +): + # type: (...) -> None + """Debugging aid, which can dump a Deb822Element or a list of tokens/elements + + :param ast_tree: Either a Deb822Element or an iterable Deb822Token/Deb822Element entries + (both types may be mixed in the same iterable, which enable it to dump the + ast tree at different stages of parse_deb822_file method) + :param end_marker_after: The dump will add "end of element" markers if a + given element spans at least this many tokens/elements. Can be disabled + with by passing None as value. Use 0 for unconditionally marking all + elements (note that tokens never get an "end of element" marker as they + are not an elements). + :param output_function: Callable that receives a single str argument and is responsible + for "displaying" that line. The callable may be invoked multiple times (one per line + of output). Defaults to logging.info if omitted. + + """ + # Avoid circular dependency + # pylint: disable=import-outside-toplevel + from debian._deb822_repro.parsing import Deb822Element + + prefix = None + if isinstance(ast_tree, Deb822Element): + ast_tree = [ast_tree] + stack = [(0, "", iter(ast_tree))] + current_no = 0 + if output_function is None: + output_function = logging.info + while stack: + start_no, name, current_iter = stack[-1] + for current in current_iter: + current_no += 1 + if prefix is None: + prefix = " " * len(stack) + if isinstance(current, Deb822Element): + stack.append( + (current_no, current.__class__.__name__, iter(current.iter_parts())) + ) + output_function(prefix + current.__class__.__name__) + prefix = None + break + output_function(prefix + str(current)) + else: + # current_iter is depleted + stack.pop() + prefix = None + if ( + end_marker_after is not None + and start_no + end_marker_after <= current_no + and name + ): + if prefix is None: + prefix = " " * len(stack) + output_function(prefix + "# <-- END OF " + name) + + +def combine_into_replacement( + source_class, # type: Type[TE] + replacement_class, # type: Type[R] + *, + constructor=None # type: Optional[Callable[[List[TE]], R]] +): + # type: (...) -> _combine_parts_ret_type[TE, R] + """Combines runs of one type into another type + + This is primarily useful for transforming tokens (e.g, Comment tokens) into + the relevant element (such as the Comment element). + """ + if constructor is None: + _constructor = cast("Callable[[List[TE]], R]", replacement_class) + else: + # Force mypy to see that constructor is no longer optional + _constructor = constructor + + def _impl(token_stream): + # type: (Iterable[Union[TokenOrElement, TE]]) -> Iterable[Union[TokenOrElement, R]] + tokens = [] + for token in token_stream: + if isinstance(token, source_class): + tokens.append(token) + continue + + if tokens: + yield _constructor(list(tokens)) + tokens.clear() + yield token + + if tokens: + yield _constructor(tokens) + + return _impl + + +if sys.version_info >= (3, 9) or TYPE_CHECKING: + _bufferingIterator_Base = collections.abc.Iterator[T] +else: + # Python 3.5 - 3.8 compat - we are not allowed to subscript the abc.Iterator + # - use this little hack to work around it + class _bufferingIterator_Base(collections.abc.Iterator, Generic[T], ABC): + pass + + +class BufferingIterator(_bufferingIterator_Base[T], Generic[T]): + + def __init__(self, stream): + # type: (Iterable[T]) -> None + self._stream = iter(stream) # type: Iterator[T] + self._buffer = collections.deque() # type: collections.deque[T] + self._expired = False # type: bool + + def __next__(self): + # type: () -> T + if self._buffer: + return self._buffer.popleft() + if self._expired: + raise StopIteration + return next(self._stream) + + def takewhile(self, predicate): + # type: (Callable[[T], bool]) -> Iterable[T] + """Variant of itertools.takewhile except it does not discard the first non-matching token""" + buffer = self._buffer + while buffer or self._fill_buffer(5): + v = buffer[0] + if predicate(v): + buffer.popleft() + yield v + else: + break + + def consume_many(self, count): + # type: (int) -> List[T] + self._fill_buffer(count) + buffer = self._buffer + if len(buffer) == count: + ret = list(buffer) + buffer.clear() + else: + ret = [] + while buffer and count: + ret.append(buffer.popleft()) + count -= 1 + return ret + + def peek_buffer(self): + # type: () -> List[T] + return list(self._buffer) + + def peek_find( + self, + predicate, # type: Callable[[T], bool] + limit=None, # type: Optional[int] + ): + # type: (...) -> Optional[int] + buffer = self._buffer + i = 0 + while limit is None or i < limit: + if i >= len(buffer): + self._fill_buffer(i + 5) + if i >= len(buffer): + return None + v = buffer[i] + if predicate(v): + return i + 1 + i += 1 + return None + + def _fill_buffer(self, number): + # type: (int) -> bool + if not self._expired: + while len(self._buffer) < number: + try: + self._buffer.append(next(self._stream)) + except StopIteration: + self._expired = True + break + return bool(self._buffer) + + def peek(self): + # type: () -> Optional[T] + return self.peek_at(1) + + def peek_at(self, tokens_ahead): + # type: (int) -> Optional[T] + self._fill_buffer(tokens_ahead) + return ( + self._buffer[tokens_ahead - 1] + if len(self._buffer) >= tokens_ahead + else None + ) + + def peek_many(self, number): + # type: (int) -> List[T] + self._fill_buffer(number) + buffer = self._buffer + if len(buffer) == number: + ret = list(buffer) + elif number: + ret = [] + for t in buffer: + ret.append(t) + number -= 1 + if not number: + break + else: + ret = [] + return ret + + +def len_check_iterator( + content, # type: str + stream, # type: Iterable[TE] + content_len=None, # type: Optional[int] +): + # type: (...) -> Iterable[TE] + """Flatten a parser's output into tokens and verify it covers the entire line/text""" + if content_len is None: + content_len = len(content) + # Fail-safe to ensure none of the value parsers incorrectly parse a value. + covered = 0 + for token_or_element in stream: + # We use the AttributeError to discriminate between elements and tokens + # The cast()s are here to assist / workaround mypy not realizing that. + try: + tokens = cast("Deb822Element", token_or_element).iter_tokens() + except AttributeError: + token = cast("Deb822Token", token_or_element) + covered += len(token.text) + else: + for token in tokens: + covered += len(token.text) + yield token_or_element + if covered != content_len: + if covered < content_len: + msg = textwrap.dedent( + """\ + Value parser did not fully cover the entire line with tokens ( + missing range {covered}..{content_len}). Occurred when parsing "{content}" + """ + ).format(covered=covered, content_len=content_len, line=content) + raise ValueError(msg) + msg = textwrap.dedent( + """\ + Value parser emitted tokens for more text than was present? Should have + emitted {content_len} characters, got {covered}. Occurred when parsing + "{content}" + """ + ).format(covered=covered, content_len=content_len, content=content) + raise ValueError(msg) diff --git a/src/debputy/lsp/vendoring/_deb822_repro/formatter.py b/src/debputy/lsp/vendoring/_deb822_repro/formatter.py new file mode 100644 index 0000000..a2b797b --- /dev/null +++ b/src/debputy/lsp/vendoring/_deb822_repro/formatter.py @@ -0,0 +1,478 @@ +import operator + +from ._util import BufferingIterator +from .tokens import Deb822Token + +# Consider these "opaque" enum-like values. The actual value was chosen to +# make repr easier to implement, but they are subject to change. +_CONTENT_TYPE_VALUE = "is_value" +_CONTENT_TYPE_COMMENT = "is_comment" +_CONTENT_TYPE_SEPARATOR = "is_separator" + +try: + from typing import Iterator, Union, Literal + from .types import TokenOrElement, FormatterCallback +except ImportError: + pass + + +class FormatterContentToken(object): + """Typed, tagged text for use with the formatting API + + The FormatterContentToken is used by the formatting API and provides the + formatter callback with context about the textual tokens it is supposed + to format. + """ + + __slots__ = ("_text", "_content_type") + + def __init__(self, text, content_type): + # type: (str, object) -> None + self._text = text + self._content_type = content_type + + @classmethod + def from_token_or_element(cls, token_or_element): + # type: (TokenOrElement) -> FormatterContentToken + if isinstance(token_or_element, Deb822Token): + if token_or_element.is_comment: + return cls.comment_token(token_or_element.text) + if token_or_element.is_whitespace: + raise ValueError("FormatterContentType cannot be whitespace") + return cls.value_token(token_or_element.text) + # Elements are assumed to be content (this is specialized for the + # interpretations where comments are always tokens). + return cls.value_token(token_or_element.convert_to_text()) + + @classmethod + def separator_token(cls, text): + # type: (str) -> FormatterContentToken + # Special-case separators as a minor memory optimization + if text == " ": + return SPACE_SEPARATOR_FT + if text == ",": + return COMMA_SEPARATOR_FT + return cls(text, _CONTENT_TYPE_SEPARATOR) + + @classmethod + def comment_token(cls, text): + # type: (str) -> FormatterContentToken + """Generates a single comment token with the provided text + + Mostly useful for creating test cases + """ + return cls(text, _CONTENT_TYPE_COMMENT) + + @classmethod + def value_token(cls, text): + # type: (str) -> FormatterContentToken + """Generates a single value token with the provided text + + Mostly useful for creating test cases + """ + return cls(text, _CONTENT_TYPE_VALUE) + + @property + def is_comment(self): + # type: () -> bool + """True if this formatter token represent a comment + + This should be used for determining whether the token is a comment + or not. It might be tempting to check whether the text in the token + starts with a "#" but that is insufficient because a value *can* + start with that as well. Whether it is a comment or a value is + based on the context (it is a comment if and only if the "#" was + at the start of a line) but the formatter often do not have the + context available to assert this. + + The formatter *should* preserve the order of comments and interleave + between the value tokens in the same order as it see them. Failing + to preserve the order of comments and values can cause confusing + comments (such as associating the comment with a different value + than it was written for). + + The formatter *may* discard comment tokens if it does not want to + preserve them. If so, they would be omitted in the output, which + may be acceptable in some cases. This is a lot better than + re-ordering comments. + + Formatters must be aware of the following special cases for comments: + * Comments *MUST* be emitted after a newline. If the very first token + is a comment, the formatter is expected to emit a newline before it + as well (Fields cannot start immediately on a comment). + """ + return self._content_type is _CONTENT_TYPE_COMMENT + + @property + def is_value(self): + # type: () -> bool + """True if this formatter token represents a semantic value + + The formatter *MUST* preserve values as-in in its output. It may + "unpack" it from the token (as in, return it as a part of a plain + str) but the value content must not be changed nor re-ordered relative + to other value tokens (as that could change the meaning of the field). + """ + return self._content_type is _CONTENT_TYPE_VALUE + + @property + def is_separator(self): + # type: () -> bool + """True if this formatter token represents a separator token + + The formatter is not required to preserve the provided separators but it + is required to properly separate values. In fact, often is a lot easier + to discard existing separator tokens. As an example, in whitespace + separated list of values space, tab and newline all counts as separator. + However, formatting-wise, there is a world of difference between the + a space, tab and a newline. In particularly, newlines must be followed + by an additional space or tab (to act as a value continuation line) if + there is a value following it (otherwise, the generated output is + invalid). + """ + return self._content_type is _CONTENT_TYPE_SEPARATOR + + @property + def is_whitespace(self): + # type: () -> bool + """True if this formatter token represents a whitespace token""" + return self._content_type is _CONTENT_TYPE_SEPARATOR and self._text.isspace() + + @property + def text(self): + # type: () -> str + """The actual context of the token + + This field *must not* be used to determine the type of token. The + formatter cannot reliably tell whether "#..." is a comment or a value + (it can be both). Use is_value and is_comment instead for discriminating + token types. + + For value tokens, this the concrete value to be omitted. + + For comment token, this is the full comment text. + + This is the same as str(token). + """ + return self._text + + def __str__(self): + # type: () -> str + return self._text + + def __repr__(self): + # type: () -> str + return "{}({!r}, {}=True)".format( + self.__class__.__name__, self._text, self._content_type + ) + + +SPACE_SEPARATOR_FT = FormatterContentToken(" ", _CONTENT_TYPE_SEPARATOR) +COMMA_SEPARATOR_FT = FormatterContentToken(",", _CONTENT_TYPE_SEPARATOR) + + +def one_value_per_line_formatter( + indentation, # type: Union[int, Literal["FIELD_NAME_LENGTH"]] + trailing_separator=True, # type: bool + immediate_empty_line=False, # type: bool +): + # type: (...) -> FormatterCallback + """Provide a simple formatter that can handle indentation and trailing separators + + All formatters returned by this function puts exactly one value per line. This + pattern is commonly seen in the "Depends" field and similar fields of + debian/control files. + + :param indentation: Either the literal string "FIELD_NAME_LENGTH" or a positive + integer, which determines the indentation for fields. If it is an integer, + then a fixed indentation is used (notably the value 1 ensures the shortest + possible indentation). Otherwise, if it is "FIELD_NAME_LENGTH", then the + indentation is set such that it aligns the values based on the field name. + :param trailing_separator: If True, then the last value will have a trailing + separator token (e.g., ",") after it. + :param immediate_empty_line: Whether the value should always start with an + empty line. If True, then the result becomes something like "Field:\n value". + + """ + if indentation != "FIELD_NAME_LENGTH" and indentation < 1: + raise ValueError('indentation must be at least 1 (or "FIELD_NAME_LENGTH")') + + def _formatter( + name, # type: str + sep_token, # type: FormatterContentToken + formatter_tokens, # type: Iterator[FormatterContentToken] + ): + # type: (...) -> Iterator[Union[FormatterContentToken, str]] + if indentation == "FIELD_NAME_LENGTH": + indent_len = len(name) + 2 + else: + indent_len = indentation + indent = " " * indent_len + + emitted_first_line = False + tok_iter = BufferingIterator(formatter_tokens) + is_value = operator.attrgetter("is_value") + if immediate_empty_line: + emitted_first_line = True + yield "\n" + for t in tok_iter: + if t.is_comment: + if not emitted_first_line: + yield "\n" + yield t + elif t.is_value: + if not emitted_first_line: + yield " " + else: + yield indent + yield t + if not sep_token.is_whitespace and ( + trailing_separator or tok_iter.peek_find(is_value) + ): + yield sep_token + yield "\n" + else: + # Skip existing separators (etc.) + continue + emitted_first_line = True + + return _formatter + + +one_value_per_line_trailing_separator = one_value_per_line_formatter( + "FIELD_NAME_LENGTH", trailing_separator=True +) + + +def format_field( + formatter, # type: FormatterCallback + field_name, # type: str + separator_token, # type: FormatterContentToken + token_iter, # type: Iterator[FormatterContentToken] +): + # type: (...) -> str + """Format a field using a provided formatter + + This function formats a series of tokens using the provided formatter. + It can be used as a standalone formatter engine and can be used in test + suites to validate third-party formatters (enabling them to test for + corner cases without involving parsing logic). + + The formatter receives series of FormatterContentTokens (via the + token_iter) and is expected to yield one or more str or + FormatterContentTokens. The calling function will combine all of + these into a single string, which will be used as the value. + + The formatter is recommended to yield the provided value and comment + tokens interleaved with text segments of whitespace and separators + as part of its output. If it preserve comment and value tokens, the + calling function can provide some runtime checks to catch bugs + (like the formatter turning a comment into a value because it forgot + to ensure that the comment was emitted directly after a newline + character). + + When writing a formatter, please keep the following in mind: + + * The output of the formatter is appended directly after the ":" separator. + Most formatters will want to emit either a space or a newline as the very + first character for readability. + (compare "Depends:foo\\n" to "Depends: foo\\n") + + * The formatter must always end its output on a newline. This is a design + choice of how the round-trip safe parser represent values that is imposed + on the formatter. + + * It is often easier to discard/ignore all separator tokens from the + the provided token sequence and instead just yield separator tokens/str + where the formatter wants to place them. + + - The formatter is strongly recommended to special-case formatting + for whitespace separators (check for `separator_token.is_whitespace`). + + This is because space, tab and newline all counts as valid separators + and can all appear in the token sequence. If the original field uses + a mix of these separators it is likely to completely undermine the + desired result. Not to mention the additional complexity of handling + when a separator token happens to use the newline character which + affects how the formatter is supposed what comes after it + (see the rules for comments, empty lines and continuation line + markers). + + * The formatter must remember to emit a "continuation line" marker + (typically a single space or tab) when emitting a value after + a newline or a comment. A `yield " "` is sufficient. + + - The continuation line marker may be embedded inside a str + with other whitespace (such as the newline coming before it + or/and whitespace used for indentation purposes following + the marker). + + * The formatter must not cause the output to contain completely + empty/whitespace lines as these cause syntax errors. The first + line never counts as an empty line (as it will be appended after + the field name). + + * Tokens must be discriminated via the `token.is_value` (etc.) + properties. Assuming that `token.text.startswith("#")` implies a + comment and similar stunts are wrong. As an example, "#foo" is a + perfectly valid value in some contexts. + + * Comment tokens *always* take up exactly one complete line including + the newline character at the end of the line. They must be emitted + directly after a newline character or another comment token. + + * Special cases that are rare but can happen: + + - Fields *can* start with comments and requires a formatter provided newline. + (Example: "Depends:\\n# Comment here\\n foo") + + - Fields *can* start on a separator or have two separators in a row. + This is especially true for whitespace separated fields where every + whitespace counts as a separator, but it can also happen with other + separators (such as comma). + + - Value tokens can contain whitespace (for non-whitespace separators). + When they do, the formatter must not attempt change nor "normalize" + the whitespace inside the value token as that might change how the + value is interpreted. (If you want to normalize such whitespace, + the formatter is at the wrong abstraction level. Instead, manipulate + the values directly in the value interpretation layer) + + This function will provide *some* runtime checks of its input and the + output from the formatter to detect some errors early and provide + helpful diagnostics. If you use the function for testing, you are + recommended to rely on verifying the output of the function rather than + relying on the runtime checks (as these are subject to change). + + :param formatter: A formatter (see FormatterCallback for the type). + Basic formatting is provided via one_value_per_line_trailing_separator + (a formatter) or one_value_per_line_formatter (a formatter generator). + :param field_name: The name of the field. + :param separator_token: One of SPACE_SEPARATOR and COMMA_SEPARATOR + :param token_iter: An iterable of tokens to be formatted. + + The following example shows how to define a formatter_callback along with + a few verifications. + + >>> fmt_field_len_sep = one_value_per_line_trailing_separator + >>> fmt_shortest = one_value_per_line_formatter( + ... 1, + ... trailing_separator=False + ... ) + >>> fmt_newline_first = one_value_per_line_formatter( + ... 1, + ... trailing_separator=False, + ... immediate_empty_line=True + ... ) + >>> # Omit separator tokens for in the token list for simplicity (the formatter does + >>> # not use them, and it enables us to keep the example simple by reusing the list) + >>> tokens = [ + ... FormatterContentToken.value_token("foo"), + ... FormatterContentToken.comment_token("# some comment about bar\\n"), + ... FormatterContentToken.value_token("bar"), + ... ] + >>> # Starting with fmt_dl_ts + >>> print(format_field(fmt_field_len_sep, "Depends", COMMA_SEPARATOR_FT, tokens), end='') + Depends: foo, + # some comment about bar + bar, + >>> print(format_field(fmt_field_len_sep, "Architecture", SPACE_SEPARATOR_FT, tokens), end='') + Architecture: foo + # some comment about bar + bar + >>> # Control check for the special case where the field starts with a comment + >>> print(format_field(fmt_field_len_sep, "Depends", COMMA_SEPARATOR_FT, tokens[1:]), end='') + Depends: + # some comment about bar + bar, + >>> # Also, check single line values (to ensure it ends on a newline) + >>> print(format_field(fmt_field_len_sep, "Depends", COMMA_SEPARATOR_FT, tokens[2:]), end='') + Depends: bar, + >>> ### Changing format to the shortest length + >>> print(format_field(fmt_shortest, "Depends", COMMA_SEPARATOR_FT, tokens), end='') + Depends: foo, + # some comment about bar + bar + >>> print(format_field(fmt_shortest, "Architecture", SPACE_SEPARATOR_FT, tokens), end='') + Architecture: foo + # some comment about bar + bar + >>> # Control check for the special case where the field starts with a comment + >>> print(format_field(fmt_shortest, "Depends", COMMA_SEPARATOR_FT, tokens[1:]), end='') + Depends: + # some comment about bar + bar + >>> # Also, check single line values (to ensure it ends on a newline) + >>> print(format_field(fmt_shortest, "Depends", COMMA_SEPARATOR_FT, tokens[2:]), end='') + Depends: bar + >>> ### Changing format to the newline first format + >>> print(format_field(fmt_newline_first, "Depends", COMMA_SEPARATOR_FT, tokens), end='') + Depends: + foo, + # some comment about bar + bar + >>> print(format_field(fmt_newline_first, "Architecture", SPACE_SEPARATOR_FT, tokens), end='') + Architecture: + foo + # some comment about bar + bar + >>> # Control check for the special case where the field starts with a comment + >>> print(format_field(fmt_newline_first, "Depends", COMMA_SEPARATOR_FT, tokens[1:]), end='') + Depends: + # some comment about bar + bar + >>> # Also, check single line values (to ensure it ends on a newline) + >>> print(format_field(fmt_newline_first, "Depends", COMMA_SEPARATOR_FT, tokens[2:]), end='') + Depends: + bar + """ + formatted_tokens = [field_name, ":"] + just_after_newline = False + last_was_value_token = False + if isinstance(token_iter, list): + # Stop people from using this to test known "invalid" cases. + last_token = token_iter[-1] + if last_token.is_comment: + raise ValueError( + "Invalid token_iter: Field values cannot end with comments" + ) + for token in formatter(field_name, separator_token, token_iter): + token_as_text = str(token) + # If we are given formatter tokens, then use them to verify the output. + if isinstance(token, FormatterContentToken): + if token.is_comment: + if not just_after_newline: + raise ValueError( + "Bad format: Comments must appear directly after a newline." + ) + # for the sake of ensuring people use proper test data. + if not token_as_text.startswith("#"): + raise ValueError("Invalid Comment token: Must start with #") + if not token_as_text.endswith("\n"): + raise ValueError("Invalid Comment token: Must end on a newline") + elif token.is_value: + if token_as_text[0].isspace() or token_as_text[-1].isspace(): + raise ValueError( + "Invalid Value token: It cannot start nor end on whitespace" + ) + if just_after_newline: + raise ValueError("Bad format: Missing continuation line marker") + if last_was_value_token: + raise ValueError("Bad format: Formatter omitted a separator") + + last_was_value_token = token.is_value + else: + last_was_value_token = False + + if just_after_newline: + if token_as_text[0] in ("\r", "\n"): + raise ValueError("Bad format: Saw completely empty line.") + if not token_as_text[0].isspace() and not token_as_text.startswith("#"): + raise ValueError("Bad format: Saw completely empty line.") + formatted_tokens.append(token_as_text) + just_after_newline = token_as_text.endswith("\n") + + formatted_text = "".join(formatted_tokens) + if not formatted_text.endswith("\n"): + raise ValueError("Bad format: The field value must end on a newline") + return formatted_text diff --git a/src/debputy/lsp/vendoring/_deb822_repro/locatable.py b/src/debputy/lsp/vendoring/_deb822_repro/locatable.py new file mode 100644 index 0000000..90bfa1c --- /dev/null +++ b/src/debputy/lsp/vendoring/_deb822_repro/locatable.py @@ -0,0 +1,413 @@ +import dataclasses +import itertools +import sys + +from typing import Optional, TYPE_CHECKING, Iterable + +if TYPE_CHECKING: + from typing import Self + from .parsing import Deb822Element + + +_DATA_CLASS_OPTIONAL_ARGS = {} +if sys.version_info >= (3, 10): + # The `slots` feature greatly reduces the memory usage by avoiding the `__dict__` + # instance. But at the end of the day, performance is "nice to have" for this + # feature and all current consumers are at Python 3.12 (except the CI tests...) + _DATA_CLASS_OPTIONAL_ARGS["slots"] = True + + +@dataclasses.dataclass(frozen=True, **_DATA_CLASS_OPTIONAL_ARGS) +class Position: + """Describes a "cursor" position inside a file + + It consists of a line position (0-based line number) and a cursor position. This is modelled + after the "Position" in Language Server Protocol (LSP). + """ + + line_position: int + """Describes the line position as a 0-based line number + + See line_number if you want a human-readable line number + """ + cursor_position: int + """Describes a cursor position ("between two characters") or a character offset. + + When this value is 0, the position is at the start of a line. When it is 1, then + the position is between the first and the second character (etc.). + """ + + @property + def line_number(self) -> int: + """The line number as human would count it""" + return self.line_position + 1 + + def relative_to(self, new_base: "Position") -> "Position": + """Offsets the position relative to another position + + This is useful to avoid the `position_in_file()` method by caching where + the parents position and then for its children you use `range_in_parent()` + plus `relative_to()` to rebase the range. + + >>> parent: Locatable = ... # doctest: +SKIP + >>> children: Iterable[Locatable] = ... # doctest: +SKIP + >>> # This will expensive + >>> parent_pos = parent.position_in_file( # doctest: +SKIP + ... skip_leading_comments=False + ... ) + >>> for child in children: # doctest: +SKIP + ... child_pos = child.position_in_parent() + ... # Avoid a position_in_file() for each child + ... child_pos_in_file = child_pos.relative_to(parent_pos) + ... ... # Use the child_pos_in_file for something + + :param new_base: The position that should have been the origin rather than + (0, 0). + :returns: The range offset relative to the base position. + """ + if self.line_position == 0 and self.cursor_position == 0: + return new_base + if new_base.line_position == 0 and new_base.cursor_position == 0: + return self + if self.line_position == 0: + line_number = new_base.line_position + line_char_offset = new_base.cursor_position + self.cursor_position + else: + line_number = self.line_position + new_base.line_position + line_char_offset = self.cursor_position + return Position( + line_number, + line_char_offset, + ) + + +@dataclasses.dataclass(frozen=True, **_DATA_CLASS_OPTIONAL_ARGS) +class Range: + """Describes a range inside a file + + This can be useful to describe things like "from line 4, cursor position 2 + to line 7 to cursor position 10". When describing a full line including the + newline, use line N, cursor position 0 to line N+1. cursor position 0. + + It is also used to denote the size of objects (in that case, the start position + is set to START_POSITION as a convention if the precise location is not + specified). + + This is modelled after the "Range" in Language Server Protocol (LSP). + """ + + start_pos: Position + end_pos: Position + + @property + def start_line_position(self) -> int: + """Describes the start line position as a 0-based line number + + See start_line_number if you want a human-readable line number + """ + return self.start_pos.line_position + + @property + def start_cursor_position(self) -> int: + """Describes the starting cursor position + + When this value is 0, the position is at the start of a line. When it is 1, then + the position is between the first and the second character (etc.). + """ + return self.start_pos.cursor_position + + @property + def start_line_number(self) -> int: + """The start line number as human would count it""" + return self.start_pos.line_number + + @property + def end_line_position(self) -> int: + """Describes the end line position as a 0-based line number + + See end_line_number if you want a human-readable line number + """ + return self.end_pos.line_position + + @property + def end_line_number(self) -> int: + """The end line number as human would count it""" + return self.end_pos.line_number + + @property + def end_cursor_position(self) -> int: + """Describes the end cursor position + + When this value is 0, the position is at the start of a line. When it is 1, then + the position is between the first and the second character (etc.). + """ + return self.end_pos.cursor_position + + @property + def line_count(self) -> int: + """The number of lines (newlines) spanned by this range. + + Will be zero when the range fits inside one line. + """ + return self.end_line_position - self.start_line_position + + @classmethod + def between(cls, a: Position, b: Position) -> "Self": + """Computes the range between two positions + + Unlike the constructor, this will always create a "positive" range. + That is, the "earliest" position will always be the start position + regardless of the order they were passed to `between`. When using + the Range constructor, you have freedom to do "inverse" ranges + in case that is ever useful + """ + if a.line_position > b.line_position or ( + a.line_position == b.line_position and a.cursor_position > b.cursor_position + ): + # Order swap, so `a` is always the earliest position + a, b = b, a + return cls( + a, + b, + ) + + def relative_to(self, new_base: Position) -> "Range": + """Offsets the range relative to another position + + This is useful to avoid the `position_in_file()` method by caching where + the parents position and then for its children you use `range_in_parent()` + plus `relative_to()` to rebase the range. + + >>> parent: Locatable = ... # doctest: +SKIP + >>> children: Iterable[Locatable] = ... # doctest: +SKIP + >>> # This will expensive + >>> parent_pos = parent.position_in_file( # doctest: +SKIP + ... skip_leading_comments=False + ... ) + >>> for child in children: # doctest: +SKIP + ... child_range = child.range_in_parent() + ... # Avoid a position_in_file() for each child + ... child_range_in_file = child_range.relative_to(parent_pos) + ... ... # Use the child_range_in_file for something + + :param new_base: The position that should have been the origin rather than + (0, 0). + :returns: The range offset relative to the base position. + """ + if new_base == START_POSITION: + return self + return Range( + self.start_pos.relative_to(new_base), + self.end_pos.relative_to(new_base), + ) + + def as_size(self) -> "Range": + """Reduces the range to a "size" + + The returned range will always have its start position to (0, 0) and + its end position shifted accordingly if it was not already based at + (0, 0). + + The original range is not mutated and, if it is already at (0, 0), the + method will just return it as-is. + """ + if self.start_pos == START_POSITION: + return self + line_count = self.line_count + if line_count: + new_end_cursor_position = self.end_cursor_position + else: + delta = self.end_cursor_position - self.start_cursor_position + new_end_cursor_position = delta + return Range( + START_POSITION, + Position( + line_count, + new_end_cursor_position, + ), + ) + + @classmethod + def from_position_and_size(cls, base: Position, size: "Range") -> "Self": + """Compute a range from a position and the size of another range + + This provides you with a range starting at the base position that has + the same effective span as the size parameter. + + :param base: The desired starting position + :param size: A range, which will be used as a size (that is, it will + be reduced to a size via the `as_size()` method) for the resulting + range + :returns: A range at the provided base position that has the size of + the provided range. + """ + line_position = base.line_position + cursor_position = base.cursor_position + size_rebased = size.as_size() + lines = size_rebased.line_count + if lines: + line_position += lines + cursor_position = size_rebased.end_cursor_position + else: + delta = ( + size_rebased.end_cursor_position - size_rebased.start_cursor_position + ) + cursor_position += delta + return cls( + base, + Position( + line_position, + cursor_position, + ), + ) + + @classmethod + def from_position_and_sizes( + cls, base: Position, sizes: Iterable["Range"] + ) -> "Self": + """Compute a range from a position and the size of number of ranges + + :param base: The desired starting position + :param sizes: All the ranges that combined makes up the size of the + desired position. Note that order can affect the end result. Particularly + the end character offset gets reset everytime a size spans a line. + :returns: A range at the provided base position that has the size of + the provided range. + """ + line_position = base.line_position + cursor_position = base.cursor_position + for size in sizes: + size_rebased = size.as_size() + lines = size_rebased.line_count + if lines: + line_position += lines + cursor_position = size_rebased.end_cursor_position + else: + delta = ( + size_rebased.end_cursor_position + - size_rebased.start_cursor_position + ) + cursor_position += delta + return cls( + base, + Position( + line_position, + cursor_position, + ), + ) + + +START_POSITION = Position(0, 0) +SECOND_CHAR_POS = Position(0, 1) +SECOND_LINE_POS = Position(1, 0) +ONE_CHAR_RANGE = Range.between(START_POSITION, SECOND_CHAR_POS) +ONE_LINE_RANGE = Range.between(START_POSITION, SECOND_LINE_POS) + + +class Locatable: + __slots__ = () + + @property + def parent_element(self): + # type: () -> Optional[Deb822Element] + raise NotImplementedError + + def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position: + """The start position of this token/element inside its parent + + This is operation is generally linear to the number of "parts" (elements/tokens) + inside the parent. + + :param skip_leading_comments: If True, then if any leading comment that + that can be skipped will be excluded in the position of this locatable. + This is useful if you want the position "semantic" content of a field + without also highlighting a leading comment. Remember to align this + parameter with the `size` call, so the range does not "overshoot" + into the next element (or falls short and only covers part of an + element). Note that this option can only be used to filter out leading + comments when the comments are a subset of the element. It has no + effect on elements that are entirely made of comments. + """ + # pylint: disable=unused-argument + # Note: The base class makes no assumptions about what tokens can be skipped, + # therefore, skip_leading_comments is unused here. However, I do not want the + # API to differ between elements and tokens. + + parent = self.parent_element + if parent is None: + raise TypeError( + "Cannot determine the position since the object is detached" + ) + relevant_parts = itertools.takewhile( + lambda x: x is not self, parent.iter_parts() + ) + span = Range.from_position_and_sizes( + START_POSITION, + (x.size(skip_leading_comments=False) for x in relevant_parts), + ) + return span.end_pos + + def range_in_parent(self, *, skip_leading_comments: bool = True) -> Range: + """The range of this token/element inside its parent + + This is operation is generally linear to the number of "parts" (elements/tokens) + inside the parent. + + :param skip_leading_comments: If True, then if any leading comment that + that can be skipped will be excluded in the position of this locatable. + This is useful if you want the position "semantic" content of a field + without also highlighting a leading comment. Remember to align this + parameter with the `size` call, so the range does not "overshoot" + into the next element (or falls short and only covers part of an + element). Note that this option can only be used to filter out leading + comments when the comments are a subset of the element. It has no + effect on elements that are entirely made of comments. + """ + pos = self.position_in_parent(skip_leading_comments=skip_leading_comments) + return Range.from_position_and_size( + pos, self.size(skip_leading_comments=skip_leading_comments) + ) + + def position_in_file(self, *, skip_leading_comments: bool = True) -> Position: + """The start position of this token/element in this file + + This is an *expensive* operation and in many cases have to traverse + the entire file structure to answer the query. Consider whether + you can maintain the parent's position and then use + `position_in_parent()` combined with + `child_position.relative_to(parent_position)` + + :param skip_leading_comments: If True, then if any leading comment that + that can be skipped will be excluded in the position of this locatable. + This is useful if you want the position "semantic" content of a field + without also highlighting a leading comment. Remember to align this + parameter with the `size` call, so the range does not "overshoot" + into the next element (or falls short and only covers part of an + element). Note that this option can only be used to filter out leading + comments when the comments are a subset of the element. It has no + effect on elements that are entirely made of comments. + """ + position = self.position_in_parent( + skip_leading_comments=skip_leading_comments, + ) + parent = self.parent_element + if parent is not None: + parent_position = parent.position_in_file(skip_leading_comments=False) + position = position.relative_to(parent_position) + return position + + def size(self, *, skip_leading_comments: bool = True) -> Range: + """Describe the objects size as a continuous range + + :param skip_leading_comments: If True, then if any leading comment that + that can be skipped will be excluded in the position of this locatable. + This is useful if you want the position "semantic" content of a field + without also highlighting a leading comment. Remember to align this + parameter with the `position_in_file` or `position_in_parent` call, + so the range does not "overshoot" into the next element (or falls + short and only covers part of an element). Note that this option can + only be used to filter out leading comments when the comments are a + subset of the element. It has no effect on elements that are entirely + made of comments. + """ + raise NotImplementedError diff --git a/src/debputy/lsp/vendoring/_deb822_repro/parsing.py b/src/debputy/lsp/vendoring/_deb822_repro/parsing.py new file mode 100644 index 0000000..13e59b1 --- /dev/null +++ b/src/debputy/lsp/vendoring/_deb822_repro/parsing.py @@ -0,0 +1,3497 @@ +# -*- coding: utf-8 -*- vim: fileencoding=utf-8 : + +import collections.abc +import contextlib +import sys +import textwrap +import weakref +from abc import ABC +from types import TracebackType +from weakref import ReferenceType + +from ._util import ( + combine_into_replacement, + BufferingIterator, + len_check_iterator, +) +from .formatter import ( + FormatterContentToken, + one_value_per_line_trailing_separator, + format_field, +) +from .locatable import Locatable, START_POSITION, Position, Range +from .tokens import ( + Deb822Token, + Deb822ValueToken, + Deb822SemanticallySignificantWhiteSpace, + Deb822SpaceSeparatorToken, + Deb822CommentToken, + Deb822WhitespaceToken, + Deb822ValueContinuationToken, + Deb822NewlineAfterValueToken, + Deb822CommaToken, + Deb822FieldNameToken, + Deb822FieldSeparatorToken, + Deb822ErrorToken, + tokenize_deb822_file, + comma_split_tokenizer, + whitespace_split_tokenizer, +) +from .types import AmbiguousDeb822FieldKeyError, SyntaxOrParseError +from debian._util import ( + resolve_ref, + LinkedList, + LinkedListNode, + OrderedSet, + _strI, + default_field_sort_key, +) + +try: + from typing import ( + Iterable, + Iterator, + List, + Union, + Dict, + Optional, + Callable, + Any, + Generic, + Type, + Tuple, + IO, + cast, + overload, + Mapping, + TYPE_CHECKING, + Sequence, + ) + from debian._util import T + + # for some reason, pylint does not see that Commentish is used in typing + from .types import ( # pylint: disable=unused-import + ST, + VE, + TE, + ParagraphKey, + TokenOrElement, + Commentish, + ParagraphKeyBase, + FormatterCallback, + ) + + if TYPE_CHECKING: + StreamingValueParser = Callable[ + [Deb822Token, BufferingIterator[Deb822Token]], VE + ] + StrToValueParser = Callable[[str], Iterable[Union["Deb822Token", VE]]] + KVPNode = LinkedListNode["Deb822KeyValuePairElement"] + else: + StreamingValueParser = None + StrToValueParser = None + KVPNode = None +except ImportError: + if not TYPE_CHECKING: + # pylint: disable=unnecessary-lambda-assignment + cast = lambda t, v: v + overload = lambda f: None + + +class ValueReference(Generic[TE]): + """Reference to a value inside a Deb822 paragraph + + This is useful for cases where want to modify values "in-place" or maybe + conditionally remove a value after looking at it. + + ValueReferences can be invalidated by various changes or actions performed + to the underlying provider of the value reference. As an example, sorting + a list of values will generally invalidate all ValueReferences related to + that list. + + The ValueReference will raise validity issues where it detects them but most + of the time it will not notice. As a means to this end, the ValueReference + will *not* keep a strong reference to the underlying value. This enables it + to detect when the container goes out of scope. However, keep in mind that + the timeliness of garbage collection is implementation defined (e.g., pypy + does not use ref-counting). + """ + + __slots__ = ( + "_node", + "_render", + "_value_factory", + "_removal_handler", + "_mutation_notifier", + ) + + def __init__( + self, + node, # type: LinkedListNode[TE] + render, # type: Callable[[TE], str] + value_factory, # type: Callable[[str], TE] + removal_handler, # type: Callable[[LinkedListNode[TokenOrElement]], None] + mutation_notifier, # type: Optional[Callable[[], None]] + ): + self._node = weakref.ref( + node + ) # type: Optional[ReferenceType[LinkedListNode[TE]]] + self._render = render + self._value_factory = value_factory + self._removal_handler = removal_handler + self._mutation_notifier = mutation_notifier + + def _resolve_node(self): + # type: () -> LinkedListNode[TE] + # NB: We check whether the "ref" itself is None (instead of the ref resolving to None) + # This enables us to tell the difference between "known removal" vs. "garbage collected" + if self._node is None: + raise RuntimeError("Cannot use ValueReference after remove()") + node = self._node() + if node is None: + raise RuntimeError("ValueReference is invalid (garbage collected)") + return node + + @property + def value(self): + # type: () -> str + """Resolve the reference into a str""" + return self._render(self._resolve_node().value) + + @value.setter + def value(self, new_value): + # type: (str) -> None + """Update the reference value + + Updating the value via this method will *not* invalidate the reference (or other + references to the same container). + + This can raise an exception if the new value does not follow the requirements + for the referenced values. As an example, values in whitespace separated + lists cannot contain spaces and would trigger an exception. + """ + self._resolve_node().value = self._value_factory(new_value) + if self._mutation_notifier is not None: + self._mutation_notifier() + + @property + def locatable(self): + # type: () -> Locatable + """Reference to a locatable that can be used to determine where this value is""" + return self._resolve_node().value + + def remove(self): + # type: () -> None + """Remove the underlying value + + This will invalidate the ValueReference (and any other ValueReferences pointing + to that exact value). The validity of other ValueReferences to that container + remains unaffected. + """ + self._removal_handler( + cast("LinkedListNode[TokenOrElement]", self._resolve_node()) + ) + self._node = None + + +if sys.version_info >= (3, 9) or TYPE_CHECKING: + _Deb822ParsedTokenList_ContextManager = contextlib.AbstractContextManager[T] +else: + # Python 3.5 - 3.8 compat - we are not allowed to subscript the abc.Iterator + # - use this little hack to work around it + # Note that Python 3.5 is so old that it does not have AbstractContextManager, + # so we re-implement it here. + class _Deb822ParsedTokenList_ContextManager(Generic[T]): + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + return None + + +class Deb822ParsedTokenList( + Generic[VE, ST], + _Deb822ParsedTokenList_ContextManager["Deb822ParsedTokenList[VE, ST]"], +): + + def __init__( + self, + kvpair_element, # type: 'Deb822KeyValuePairElement' + interpreted_value_element, # type: Deb822InterpretationProxyElement + vtype, # type: Type[VE] + stype, # type: Type[ST] + str2value_parser, # type: StrToValueParser[VE] + default_separator_factory, # type: Callable[[], ST] + render, # type: Callable[[VE], str] + ): + # type: (...) -> None + self._kvpair_element = kvpair_element + self._proxy_element = interpreted_value_element + self._token_list = LinkedList(interpreted_value_element.parts) + self._vtype = vtype + self._stype = stype + self._str2value_parser = str2value_parser + self._default_separator_factory = default_separator_factory + self._value_factory = _parser_to_value_factory(str2value_parser, vtype) + self._render = render + self._format_preserve_original_formatting = True + self._formatter = ( + one_value_per_line_trailing_separator + ) # type: FormatterCallback + self._changed = False + self.__continuation_line_char = None # type: Optional[str] + assert self._token_list + last_token = self._token_list.tail + + if last_token is not None and isinstance( + last_token, Deb822NewlineAfterValueToken + ): + # We always remove the last newline (if present), because then + # adding values will happen after the last value rather than on + # a new line by default. + # + # On write, we always ensure the value ends on a newline (even + # if it did not before). This is simpler and should be a + # non-issue in practise. + self._token_list.pop() + + def __iter__(self): + # type: () -> Iterator[str] + yield from (self._render(v) for v in self.value_parts) + + def __bool__(self): + # type: () -> bool + return next(iter(self), None) is not None + + def __exit__( + self, + exc_type, # type: Optional[Type[BaseException]] + exc_val, # type: Optional[BaseException] + exc_tb, # type: Optional[TracebackType] + ): + # type: (...) -> Optional[bool] + if exc_type is None and self._changed: + self._update_field() + return super().__exit__(exc_type, exc_val, exc_tb) + + @property + def value_parts(self): + # type: () -> Iterator[VE] + yield from (v for v in self._token_list if isinstance(v, self._vtype)) + + def _mark_changed(self): + # type: () -> None + self._changed = True + + def iter_value_references(self): + # type: () -> Iterator[ValueReference[VE]] + """Iterate over all values in the list (as ValueReferences) + + This is useful for doing inplace modification of the values or even + streaming removal of field values. It is in general also more + efficient when more than one value is updated or removed. + """ + yield from ( + ValueReference( + cast("LinkedListNode[VE]", n), + self._render, + self._value_factory, + self._remove_node, + self._mark_changed, + ) + for n in self._token_list.iter_nodes() + if isinstance(n.value, self._vtype) + ) + + def append_separator(self, space_after_separator=True): + # type: (bool) -> None + + separator_token = self._default_separator_factory() + if separator_token.is_whitespace: + space_after_separator = False + + self._changed = True + self._append_continuation_line_token_if_necessary() + self._token_list.append(separator_token) + + if space_after_separator and not separator_token.is_whitespace: + self._token_list.append(Deb822WhitespaceToken(" ")) + + def replace(self, orig_value, new_value): + # type: (str, str) -> None + """Replace the first instance of a value with another + + This method will *not* affect the validity of ValueReferences. + """ + vtype = self._vtype + for node in self._token_list.iter_nodes(): + if isinstance(node.value, vtype) and self._render(node.value) == orig_value: + node.value = self._value_factory(new_value) + self._changed = True + break + else: + raise ValueError("list.replace(x, y): x not in list") + + def remove(self, value): + # type: (str) -> None + """Remove the first instance of a value + + Removal will invalidate ValueReferences to the value being removed. + ValueReferences to other values will be unaffected. + """ + vtype = self._vtype + for node in self._token_list.iter_nodes(): + if isinstance(node.value, vtype) and self._render(node.value) == value: + node_to_remove = node + break + else: + raise ValueError("list.remove(x): x not in list") + + return self._remove_node(node_to_remove) + + def _remove_node(self, node_to_remove): + # type: (LinkedListNode[TokenOrElement]) -> None + vtype = self._vtype + self._changed = True + + # We naively want to remove the node and every thing to the left of it + # until the previous value. That is the basic idea for now (ignoring + # special-cases for now). + # + # Example: + # + # """ + # Multiline-Keywords: bar[ + # # Comment about foo + # foo] + # baz + # Keywords: bar[ foo] baz + # Comma-List: bar[, foo], baz, + # Multiline-Comma-List: bar[, + # # Comment about foo + # foo], + # baz, + # """ + # + # Assuming we want to remove "foo" for the lists, the []-markers + # show what we aim to remove. This has the nice side-effect of + # preserving whether nor not the value has a trailing separator. + # Note that we do *not* attempt to repair missing separators but + # it may fix duplicated separators by "accident". + # + # Now, there are two special cases to be aware of, where this approach + # has short comings: + # + # 1) If foo is the only value (in which case, "delete everything" + # is the only option). + # 2) If foo is the first value + # 3) If foo is not the only value on the line and we see a comment + # inside the deletion range. + # + # For 2) + 3), we attempt to flip and range to delete and every + # thing after it (up to but exclusion "baz") instead. This + # definitely fixes 3), but 2) has yet another corner case, namely: + # + # """ + # Multiline-Comma-List: foo, + # # Remark about bar + # bar, + # Another-Case: foo + # # Remark, also we use leading separator + # , bar + # """ + # + # The options include: + # + # A) Discard the comment - brain-dead simple + # B) Hoist the comment up to a field comment, but then what if the + # field already has a comment? + # C) Clear the first value line leaving just the newline and + # replace the separator before "bar" (if present) with a space. + # (leaving you with the value of the form "\n# ...\n bar") + # + + first_value_on_lhs = None # type: Optional[LinkedListNode[TokenOrElement]] + first_value_on_rhs = None # type: Optional[LinkedListNode[TokenOrElement]] + comment_before_previous_value = False + comment_before_next_value = False + for past_node in node_to_remove.iter_previous(skip_current=True): + past_token = past_node.value + if isinstance(past_token, Deb822Token) and past_token.is_comment: + comment_before_previous_value = True + continue + if isinstance(past_token, vtype): + first_value_on_lhs = past_node + break + + for future_node in node_to_remove.iter_next(skip_current=True): + future_token = future_node.value + if isinstance(future_token, Deb822Token) and future_token.is_comment: + comment_before_next_value = True + continue + if isinstance(future_token, vtype): + first_value_on_rhs = future_node + break + + if first_value_on_rhs is None and first_value_on_lhs is None: + # This was the last value, just remove everything. + self._token_list.clear() + return + + if first_value_on_lhs is not None and not comment_before_previous_value: + # Delete left + delete_lhs_of_node = True + elif first_value_on_rhs is not None and not comment_before_next_value: + # Delete right + delete_lhs_of_node = False + else: + # There is a comment on either side (or no value on one and a + # comment and the other). Keep it simple, we just delete to + # one side (preferring deleting to left if possible). + delete_lhs_of_node = first_value_on_lhs is not None + + if delete_lhs_of_node: + first_remain_lhs = first_value_on_lhs + first_remain_rhs = node_to_remove.next_node + else: + first_remain_lhs = node_to_remove.previous_node + first_remain_rhs = first_value_on_rhs + + # Actual deletion - with some manual labour to update HEAD/TAIL of + # the list in case we do a "delete everything left/right this node". + if first_remain_lhs is None: + self._token_list.head_node = first_remain_rhs + if first_remain_rhs is None: + self._token_list.tail_node = first_remain_lhs + LinkedListNode.link_nodes(first_remain_lhs, first_remain_rhs) + + def append(self, value): + # type: (str) -> None + vt = self._value_factory(value) + self.append_value(vt) + + def append_value(self, vt): + # type: (VE) -> None + value_parts = self._token_list + if value_parts: + needs_separator = False + stype = self._stype + vtype = self._vtype + for t in reversed(value_parts): + if isinstance(t, vtype): + needs_separator = True + break + if isinstance(t, stype): + break + + if needs_separator: + self.append_separator() + else: + # Looks nicer if there is a space before the very first value + self._token_list.append(Deb822WhitespaceToken(" ")) + self._append_continuation_line_token_if_necessary() + self._changed = True + value_parts.append(vt) + + def _previous_is_newline(self): + # type: () -> bool + tail = self._token_list.tail + return tail is not None and tail.convert_to_text().endswith("\n") + + def append_newline(self): + # type: () -> None + if self._previous_is_newline(): + raise ValueError( + "Cannot add a newline after a token that ends on a newline" + ) + self._token_list.append(Deb822NewlineAfterValueToken()) + + def append_comment(self, comment_text): + # type: (str) -> None + tail = self._token_list.tail + if tail is None or not tail.convert_to_text().endswith("\n"): + self.append_newline() + comment_token = Deb822CommentToken(_format_comment(comment_text)) + self._token_list.append(comment_token) + + @property + def _continuation_line_char(self): + # type: () -> str + char = self.__continuation_line_char + if char is None: + # Use ' ' by default but match the existing field if possible. + char = " " + for token in self._token_list: + if isinstance(token, Deb822ValueContinuationToken): + char = token.text + break + self.__continuation_line_char = char + return char + + def _append_continuation_line_token_if_necessary(self): + # type: () -> None + tail = self._token_list.tail + if tail is not None and tail.convert_to_text().endswith("\n"): + self._token_list.append( + Deb822ValueContinuationToken(self._continuation_line_char) + ) + + def reformat_when_finished(self): + # type: () -> None + self._enable_reformatting() + self._changed = True + + def _enable_reformatting(self): + # type: () -> None + self._format_preserve_original_formatting = False + + def no_reformatting_when_finished(self): + # type: () -> None + self._format_preserve_original_formatting = True + + def value_formatter( + self, + formatter, # type: FormatterCallback + force_reformat=False, # type: bool + ): + # type: (...) -> None + """Use a custom formatter when formatting the value + + :param formatter: A formatter (see debian._deb822_repro.formatter.format_field + for details) + :param force_reformat: If True, always reformat the field even if there are + no (other) changes performed. By default, fields are only reformatted if + they are changed. + """ + self._formatter = formatter + self._format_preserve_original_formatting = False + if force_reformat: + self._changed = True + + def clear(self): + # type: () -> None + """Like list.clear() - removes all content (including comments and spaces)""" + if self._token_list: + self._changed = True + self._token_list.clear() + + def _iter_content_as_tokens(self): + # type: () -> Iterable[Deb822Token] + for te in self._token_list: + if isinstance(te, Deb822Element): + yield from te.iter_tokens() + else: + yield te + + def _generate_reformatted_field_content(self): + # type: () -> str + separator_token = self._default_separator_factory() + vtype = self._vtype + stype = self._stype + token_list = self._token_list + + def _token_iter(): + # type: () -> Iterator[FormatterContentToken] + text = "" # type: str + for te in token_list: + if isinstance(te, Deb822Token): + if te.is_comment: + yield FormatterContentToken.comment_token(te.text) + elif isinstance(te, stype): + text = te.text + yield FormatterContentToken.separator_token(text) + else: + assert isinstance(te, vtype) + text = te.convert_to_text() + yield FormatterContentToken.value_token(text) + + return format_field( + self._formatter, + self._kvpair_element.field_name, + FormatterContentToken.separator_token(separator_token.text), + _token_iter(), + ) + + def _generate_field_content(self): + # type: () -> str + return "".join(t.text for t in self._iter_content_as_tokens()) + + def _update_field(self): + # type: () -> None + kvpair_element = self._kvpair_element + field_name = kvpair_element.field_name + token_list = self._token_list + tail = token_list.tail + had_tokens = False + + for t in self._iter_content_as_tokens(): + had_tokens = True + if not t.is_comment and not t.is_whitespace: + break + else: + if had_tokens: + raise ValueError( + "Field must be completely empty or have content " + "(i.e. non-whitespace and non-comments)" + ) + if tail is not None: + if isinstance(tail, Deb822Token) and tail.is_comment: + raise ValueError("Fields must not end on a comment") + if not tail.convert_to_text().endswith("\n"): + # Always end on a newline + self.append_newline() + + if self._format_preserve_original_formatting: + value_text = self._generate_field_content() + text = ":".join((field_name, value_text)) + else: + text = self._generate_reformatted_field_content() + + new_content = text.splitlines(keepends=True) + else: + # Special-case for the empty list which will be mapped to + # an empty field. Always end on a newline (avoids errors + # if there is a field after this) + new_content = [field_name + ":\n"] + + # As absurd as it might seem, it is easier to just use the parser to + # construct the AST correctly + deb822_file = parse_deb822_file(iter(new_content)) + error_token = deb822_file.find_first_error_element() + if error_token: + # _print_ast(deb822_file) + raise ValueError("Syntax error in new field value for " + field_name) + paragraph = next(iter(deb822_file)) + assert isinstance(paragraph, Deb822NoDuplicateFieldsParagraphElement) + new_kvpair_element = paragraph.get_kvpair_element(field_name) + assert new_kvpair_element is not None + kvpair_element.value_element = new_kvpair_element.value_element + self._changed = False + + def sort_elements( + self, + *, + key=None, # type: Optional[Callable[[VE], Any]] + reverse=False, # type: bool + ): + # type: (...) -> None + """Sort the elements (abstract values) in this list. + + This method will sort the logical values of the list. It will + attempt to preserve comments associated with a given value where + possible. Whether space and separators are preserved depends on + the contents of the field as well as the formatting settings. + + Sorting (without reformatting) is likely to leave you with "awkward" + whitespace. Therefore, you almost always want to apply reformatting + such as the reformat_when_finished() method. + + Sorting will invalidate all ValueReferences. + """ + comment_start_node = None + vtype = self._vtype + stype = self._stype + + def key_func(x): + # type: (Tuple[VE, List[TokenOrElement]]) -> Any + if key: + return key(x[0]) + return x[0].convert_to_text() + + parts = [] + + for node in self._token_list.iter_nodes(): + value = node.value + if isinstance(value, Deb822Token) and value.is_comment: + if comment_start_node is None: + comment_start_node = node + continue + + if isinstance(value, vtype): + comments = [] + if comment_start_node is not None: + for keep_node in comment_start_node.iter_next(skip_current=False): + if keep_node is node: + break + comments.append(keep_node.value) + parts.append((value, comments)) + comment_start_node = None + + parts.sort(key=key_func, reverse=reverse) + + self._changed = True + self._token_list.clear() + first_value = True + + separator_is_space = self._default_separator_factory().is_whitespace + + for value, comments in parts: + if first_value: + first_value = False + if comments: + # While unlikely, there could be a separator between the comments. + # It would be in the way and we remove it. + comments = [x for x in comments if not isinstance(x, stype)] + # Comments cannot start the field, so inject a newline to + # work around that + self.append_newline() + else: + if not separator_is_space and not any( + isinstance(x, stype) for x in comments + ): + # While unlikely, you can hide a comma between two comments and expect + # us to preserve it. However, the more common case is that the separator + # appeared before the comments and was thus omitted (leaving us to re-add + # it here). + self.append_separator(space_after_separator=False) + if comments: + self.append_newline() + else: + self._token_list.append(Deb822WhitespaceToken(" ")) + + self._token_list.extend(comments) + self.append_value(value) + + def sort( + self, + *, + key=None, # type: Optional[Callable[[str], Any]] + **kwargs, # type: Any + ): + # type: (...) -> None + """Sort the values (rendered as str) in this list. + + This method will sort the logical values of the list. It will + attempt to preserve comments associated with a given value where + possible. Whether space and separators are preserved depends on + the contents of the field as well as the formatting settings. + + Sorting (without reformatting) is likely to leave you with "awkward" + whitespace. Therefore, you almost always want to apply reformatting + such as the reformat_when_finished() method. + + Sorting will invalidate all ValueReferences. + """ + if key is not None: + render = self._render + kwargs["key"] = lambda vt: key(render(vt)) + self.sort_elements(**kwargs) + + +class Interpretation(Generic[T]): + + def interpret( + self, + kvpair_element, # type: Deb822KeyValuePairElement + discard_comments_on_read=True, # type: bool + ): + # type: (...) -> T + raise NotImplementedError # pragma: no cover + + +class GenericContentBasedInterpretation(Interpretation[T], Generic[T, VE]): + + def __init__( + self, + tokenizer, # type: Callable[[str], Iterable['Deb822Token']] + value_parser, # type: StreamingValueParser[VE] + ): + # type: (...) -> None + super().__init__() + self._tokenizer = tokenizer + self._value_parser = value_parser + + def _high_level_interpretation( + self, + kvpair_element, # type: Deb822KeyValuePairElement + proxy_element, # type: Deb822InterpretationProxyElement + discard_comments_on_read=True, # type: bool + ): + # type: (...) -> T + raise NotImplementedError # pragma: no cover + + def _parse_stream( + self, buffered_iterator # type: BufferingIterator[Deb822Token] + ): + # type: (...) -> Iterable[Union[Deb822Token, VE]] + + value_parser = self._value_parser + for token in buffered_iterator: + if isinstance(token, Deb822ValueToken): + yield value_parser(token, buffered_iterator) + else: + yield token + + def _parse_kvpair( + self, kvpair # type: Deb822KeyValuePairElement + ): + # type: (...) -> Deb822InterpretationProxyElement + value_element = kvpair.value_element + content = value_element.convert_to_text() + token_list = [] # type: List['TokenOrElement'] + token_list.extend(self._parse_str(content)) + return Deb822InterpretationProxyElement(value_element, token_list) + + def _parse_str(self, content): + # type: (str) -> Iterable[Union[Deb822Token, VE]] + content_len = len(content) + biter = BufferingIterator( + len_check_iterator( + content, + self._tokenizer(content), + content_len=content_len, + ) + ) + yield from len_check_iterator( + content, + self._parse_stream(biter), + content_len=content_len, + ) + + def interpret( + self, + kvpair_element, # type: Deb822KeyValuePairElement + discard_comments_on_read=True, # type: bool + ): + # type: (...) -> T + proxy_element = self._parse_kvpair(kvpair_element) + return self._high_level_interpretation( + kvpair_element, + proxy_element, + discard_comments_on_read=discard_comments_on_read, + ) + + +def _parser_to_value_factory( + parser, # type: StrToValueParser[VE] + vtype, # type: Type[VE] +): + # type: (...) -> Callable[[str], VE] + def _value_factory(v): + # type: (str) -> VE + if v == "": + raise ValueError("The empty string is not a value") + token_iter = iter(parser(v)) + t1 = next(token_iter, None) # type: Optional[Union[TokenOrElement]] + t2 = next(token_iter, None) + assert t1 is not None, ( + 'Bad parser - it returned None (or no TE) for "' + v + '"' + ) + if t2 is not None: + msg = textwrap.dedent( + """\ + The input "{v}" should have been exactly one element, but the parser provided at + least two. This can happen with unnecessary leading/trailing whitespace + or including commas the value for a comma list. + """ + ).format(v=v) + raise ValueError(msg) + if not isinstance(t1, vtype): + if isinstance(t1, Deb822Token) and (t1.is_comment or t1.is_whitespace): + raise ValueError( + 'The input "{v}" is whitespace or a comment: Expected a value' + ) + msg = ( + 'The input "{v}" should have produced a element of type {vtype_name}, but' + " instead it produced {t1}" + ) + raise ValueError(msg.format(v=v, vtype_name=vtype.__name__, t1=t1)) + + assert len(t1.convert_to_text()) == len(v), ( + "Bad tokenizer - the token did not cover the input text" + " exactly ({t1_len} != {v_len}".format( + t1_len=len(t1.convert_to_text()), v_len=len(v) + ) + ) + return t1 + + return _value_factory + + +class ListInterpretation( + GenericContentBasedInterpretation[Deb822ParsedTokenList[VE, ST], VE] +): + + def __init__( + self, + tokenizer, # type: Callable[[str], Iterable['Deb822Token']] + value_parser, # type: StreamingValueParser[VE] + vtype, # type: Type[VE] + stype, # type: Type[ST] + default_separator_factory, # type: Callable[[], ST] + render_factory, # type: Callable[[bool], Callable[[VE], str]] + ): + # type: (...) -> None + super().__init__(tokenizer, value_parser) + self._vtype = vtype + self._stype = stype + self._default_separator_factory = default_separator_factory + self._render_factory = render_factory + + def _high_level_interpretation( + self, + kvpair_element, # type: Deb822KeyValuePairElement + proxy_element, # type: Deb822InterpretationProxyElement + discard_comments_on_read=True, # type: bool + ): + # type: (...) -> Deb822ParsedTokenList[VE, ST] + return Deb822ParsedTokenList( + kvpair_element, + proxy_element, + self._vtype, + self._stype, + self._parse_str, + self._default_separator_factory, + self._render_factory(discard_comments_on_read), + ) + + +def _parse_whitespace_list_value(token, _): + # type: (Deb822Token, BufferingIterator[Deb822Token]) -> Deb822ParsedValueElement + return Deb822ParsedValueElement([token]) + + +def _is_comma_token(v): + # type: (TokenOrElement) -> bool + # Consume tokens until the next comma + return isinstance(v, Deb822CommaToken) + + +def _parse_comma_list_value(token, buffered_iterator): + # type: (Deb822Token, BufferingIterator[Deb822Token]) -> Deb822ParsedValueElement + comma_offset = buffered_iterator.peek_find(_is_comma_token) + value_parts = [token] + if comma_offset is not None: + # The value is followed by a comma and now we know where it ends + value_parts.extend(buffered_iterator.peek_many(comma_offset - 1)) + else: + # The value is the last value there is. Consume all remaining tokens + # and then trim from the right. + value_parts.extend(buffered_iterator.peek_buffer()) + while value_parts and not isinstance(value_parts[-1], Deb822ValueToken): + value_parts.pop() + + buffered_iterator.consume_many(len(value_parts) - 1) + return Deb822ParsedValueElement(value_parts) + + +def _parse_uploaders_list_value(token, buffered_iterator): + # type: (Deb822Token, BufferingIterator[Deb822Token]) -> Deb822ParsedValueElement + + # This is similar to _parse_comma_list_value *except* that there is an extra special + # case. Namely comma only counts as a true separator if it follows ">" + value_parts = [token] + comma_offset = -1 # type: Optional[int] + while comma_offset is not None: + comma_offset = buffered_iterator.peek_find(_is_comma_token) + if comma_offset is not None: + # The value is followed by a comma. Verify that this is a terminating + # comma (comma may appear in the name or email) + # + # We include value_parts[-1] to easily cope with the common case of + # "foo <a@b.com>," where we will have 0 peeked element to examine. + peeked_elements = [value_parts[-1]] + peeked_elements.extend(buffered_iterator.peek_many(comma_offset - 1)) + comma_was_separator = False + i = len(peeked_elements) - 1 + while i >= 0: + token = peeked_elements[i] + if isinstance(token, Deb822ValueToken): + if token.text.endswith(">"): + # The comma terminates the value + value_parts.extend(buffered_iterator.consume_many(i)) + assert isinstance( + value_parts[-1], Deb822ValueToken + ) and value_parts[-1].text.endswith(">"), "Got: " + str( + value_parts + ) + comma_was_separator = True + break + i -= 1 + if comma_was_separator: + break + value_parts.extend(buffered_iterator.consume_many(comma_offset)) + assert isinstance(value_parts[-1], Deb822CommaToken) + else: + # The value is the last value there is. Consume all remaining tokens + # and then trim from the right. + remaining_part = buffered_iterator.peek_buffer() + consume_elements = len(remaining_part) + value_parts.extend(remaining_part) + while value_parts and not isinstance(value_parts[-1], Deb822ValueToken): + value_parts.pop() + consume_elements -= 1 + buffered_iterator.consume_many(consume_elements) + + return Deb822ParsedValueElement(value_parts) + + +class Deb822Element(Locatable): + """Composite elements (consists of 1 or more tokens)""" + + __slots__ = ("_parent_element", "_full_size_cache", "__weakref__") + + def __init__(self): + # type: () -> None + self._parent_element = None # type: Optional[ReferenceType['Deb822Element']] + self._full_size_cache = None # type: Optional[Range] + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + raise NotImplementedError # pragma: no cover + + def iter_parts_of_type(self, only_element_or_token_type): + # type: (Type[TE]) -> Iterable[TE] + for part in self.iter_parts(): + if isinstance(part, only_element_or_token_type): + yield part + + def iter_tokens(self): + # type: () -> Iterable[Deb822Token] + for part in self.iter_parts(): + # Control check to catch bugs early + assert part._parent_element is not None + if isinstance(part, Deb822Element): + yield from part.iter_tokens() + else: + yield part + + def iter_recurse( + self, *, only_element_or_token_type=None # type: Optional[Type[TE]] + ): + # type: (...) -> Iterable[TE] + for part in self.iter_parts(): + if only_element_or_token_type is None or isinstance( + part, only_element_or_token_type + ): + yield cast("TE", part) + if isinstance(part, Deb822Element): + yield from part.iter_recurse( + only_element_or_token_type=only_element_or_token_type + ) + + @property + def is_error(self): + # type: () -> bool + return False + + @property + def is_comment(self): + # type: () -> bool + return False + + @property + def parent_element(self): + # type: () -> Optional[Deb822Element] + return resolve_ref(self._parent_element) + + @parent_element.setter + def parent_element(self, new_parent): + # type: (Optional[Deb822Element]) -> None + self._parent_element = ( + weakref.ref(new_parent) if new_parent is not None else None + ) + + def _init_parent_of_parts(self): + # type: () -> None + for part in self.iter_parts(): + part.parent_element = self + + # Deliberately not a "text" property, to signal that it is not necessary cheap. + def convert_to_text(self): + # type: () -> str + return "".join(t.text for t in self.iter_tokens()) + + def clear_parent_if_parent(self, parent): + # type: (Deb822Element) -> None + if parent is self.parent_element: + self._parent_element = None + + def size(self, *, skip_leading_comments: bool = True) -> Range: + size_cache = self._full_size_cache + if size_cache is None: + size_cache = Range.from_position_and_sizes( + START_POSITION, + (p.size(skip_leading_comments=False) for p in self.iter_parts()), + ) + self._full_size_cache = size_cache + return size_cache + + +class Deb822InterpretationProxyElement(Deb822Element): + + __slots__ = ("parts",) + + def __init__( + self, real_element: Deb822Element, parts: List[TokenOrElement] + ) -> None: + super().__init__() + self.parent_element = real_element + self.parts = parts + for p in parts: + p.parent_element = self + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + return iter(self.parts) + + def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position: + parent = self.parent_element + if parent is None: + raise RuntimeError("parent was garbage collected") + return parent.position_in_parent() + + def position_in_file(self, *, skip_leading_comments: bool = True) -> Position: + parent = self.parent_element + if parent is None: + raise RuntimeError("parent was garbage collected") + return parent.position_in_file() + + def size(self, *, skip_leading_comments: bool = True) -> Range: + # Same as parent except we never use a cache. + sizes = (p.size(skip_leading_comments=False) for p in self.iter_parts()) + return Range.from_position_and_sizes(START_POSITION, sizes) + + +class Deb822ErrorElement(Deb822Element): + """Element representing elements or tokens that are out of place + + Commonly, it will just be instances of Deb822ErrorToken, but it can be other + things. As an example if a parser discovers out of order elements/tokens, + it can bundle them in a Deb822ErrorElement to signal that the sequence of + elements/tokens are invalid (even if the tokens themselves are valid). + """ + + __slots__ = ("_parts",) + + def __init__(self, parts): + # type: (Sequence[TokenOrElement]) -> None + super().__init__() + self._parts = tuple(parts) + self._init_parent_of_parts() + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + yield from self._parts + + @property + def is_error(self): + # type: () -> bool + return True + + +class Deb822ValueLineElement(Deb822Element): + """Consists of one "line" of a value""" + + __slots__ = ( + "_comment_element", + "_continuation_line_token", + "_leading_whitespace_token", + "_value_tokens", + "_trailing_whitespace_token", + "_newline_token", + ) + + def __init__( + self, + comment_element, # type: Optional[Deb822CommentElement] + continuation_line_token, # type: Optional[Deb822ValueContinuationToken] + leading_whitespace_token, # type: Optional[Deb822WhitespaceToken] + value_parts, # type: List[TokenOrElement] + trailing_whitespace_token, # type: Optional[Deb822WhitespaceToken] + # only optional if it is the last line of the file and the file does not + # end with a newline. + newline_token, # type: Optional[Deb822WhitespaceToken] + ): + # type: (...) -> None + super().__init__() + if comment_element is not None and continuation_line_token is None: + raise ValueError("Only continuation lines can have comments") + self._comment_element = comment_element # type: Optional[Deb822CommentElement] + self._continuation_line_token = continuation_line_token + self._leading_whitespace_token = ( + leading_whitespace_token + ) # type: Optional[Deb822WhitespaceToken] + self._value_tokens = value_parts # type: List[TokenOrElement] + self._trailing_whitespace_token = trailing_whitespace_token + self._newline_token = newline_token # type: Optional[Deb822WhitespaceToken] + self._init_parent_of_parts() + + @property + def comment_element(self): + # type: () -> Optional[Deb822CommentElement] + return self._comment_element + + @property + def continuation_line_token(self): + # type: () -> Optional[Deb822ValueContinuationToken] + return self._continuation_line_token + + @property + def newline_token(self): + # type: () -> Optional[Deb822WhitespaceToken] + return self._newline_token + + def add_newline_if_missing(self): + # type: () -> bool + if self._newline_token is None: + self._newline_token = Deb822NewlineAfterValueToken() + self._newline_token.parent_element = self + self._full_size_cache = None + return True + return False + + def _iter_content_parts(self): + # type: () -> Iterable[TokenOrElement] + if self._leading_whitespace_token: + yield self._leading_whitespace_token + yield from self._value_tokens + if self._trailing_whitespace_token: + yield self._trailing_whitespace_token + + def _iter_content_tokens(self): + # type: () -> Iterable[Deb822Token] + for part in self._iter_content_parts(): + if isinstance(part, Deb822Element): + yield from part.iter_tokens() + else: + yield part + + def convert_content_to_text(self): + # type: () -> str + if ( + len(self._value_tokens) == 1 + and not self._leading_whitespace_token + and not self._trailing_whitespace_token + and isinstance(self._value_tokens[0], Deb822Token) + ): + # By default, we get a single value spanning the entire line + # (minus continuation line and newline, but we are supposed to + # exclude those) + return self._value_tokens[0].text + + return "".join(t.text for t in self._iter_content_tokens()) + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + if self._comment_element: + yield self._comment_element + if self._continuation_line_token: + yield self._continuation_line_token + yield from self._iter_content_parts() + if self._newline_token: + yield self._newline_token + + def size(self, *, skip_leading_comments: bool = True) -> Range: + if skip_leading_comments: + return Range.from_position_and_sizes( + START_POSITION, + ( + p.size(skip_leading_comments=False) + for p in self.iter_parts() + if not p.is_comment + ), + ) + return super().size(skip_leading_comments=skip_leading_comments) + + def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position: + base_pos = super().position_in_parent(skip_leading_comments=False) + if skip_leading_comments: + for p in self.iter_parts(): + if p.is_comment: + continue + non_comment_pos = p.position_in_parent(skip_leading_comments=False) + base_pos = non_comment_pos.relative_to(base_pos) + return base_pos + + +class Deb822ValueElement(Deb822Element): + __slots__ = ("_value_entry_elements",) + + def __init__(self, value_entry_elements): + # type: (Sequence[Deb822ValueLineElement]) -> None + super().__init__() + # Split over two lines due to line length issues + v = tuple(value_entry_elements) + self._value_entry_elements = v # type: Sequence[Deb822ValueLineElement] + self._init_parent_of_parts() + + @property + def value_lines(self): + # type: () -> Sequence[Deb822ValueLineElement] + """Read-only list of value entries""" + return self._value_entry_elements + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + yield from self._value_entry_elements + + def add_final_newline_if_missing(self): + # type: () -> bool + if self._value_entry_elements: + changed = self._value_entry_elements[-1].add_newline_if_missing() + if changed: + self._full_size_cache = None + return changed + return False + + +class Deb822ParsedValueElement(Deb822Element): + + __slots__ = ("_text_cached", "_text_no_comments_cached", "_token_list") + + def __init__(self, tokens): + # type: (List[Deb822Token]) -> None + super().__init__() + self._token_list = tokens + self._init_parent_of_parts() + if not isinstance(tokens[0], Deb822ValueToken) or not isinstance( + tokens[-1], Deb822ValueToken + ): + raise ValueError( + self.__class__.__name__ + " MUST start and end on a Deb822ValueToken" + ) + if len(tokens) == 1: + token = tokens[0] + self._text_cached = token.text # type: Optional[str] + self._text_no_comments_cached = token.text # type: Optional[str] + else: + self._text_cached = None + self._text_no_comments_cached = None + + def convert_to_text(self): + # type: () -> str + if self._text_no_comments_cached is None: + self._text_no_comments_cached = super().convert_to_text() + return self._text_no_comments_cached + + def convert_to_text_without_comments(self): + # type: () -> str + if self._text_no_comments_cached is None: + self._text_no_comments_cached = "".join( + t.text for t in self.iter_tokens() if not t.is_comment + ) + return self._text_no_comments_cached + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + yield from self._token_list + + +class Deb822CommentElement(Deb822Element): + __slots__ = ("_comment_tokens",) + + def __init__(self, comment_tokens): + # type: (Sequence[Deb822CommentToken]) -> None + super().__init__() + self._comment_tokens = tuple( + comment_tokens + ) # type: Sequence[Deb822CommentToken] + if not comment_tokens: # pragma: no cover + raise ValueError("Comment elements must have at least one comment token") + self._init_parent_of_parts() + + @property + def is_comment(self): + # type: () -> bool + return True + + def __len__(self): + # type: () -> int + return len(self._comment_tokens) + + def __getitem__(self, item): + # type: (int) -> Deb822CommentToken + return self._comment_tokens[item] + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + yield from self._comment_tokens + + +class Deb822KeyValuePairElement(Deb822Element): + __slots__ = ( + "_comment_element", + "_field_token", + "_separator_token", + "_value_element", + ) + + def __init__( + self, + comment_element, # type: Optional[Deb822CommentElement] + field_token, # type: Deb822FieldNameToken + separator_token, # type: Deb822FieldSeparatorToken + value_element, # type: Deb822ValueElement + ): + # type: (...) -> None + super().__init__() + self._comment_element = comment_element # type: Optional[Deb822CommentElement] + self._field_token = field_token # type: Deb822FieldNameToken + self._separator_token = separator_token # type: Deb822FieldSeparatorToken + self._value_element = value_element # type: Deb822ValueElement + self._init_parent_of_parts() + + @property + def field_name(self): + # type: () -> _strI + return self.field_token.text + + @property + def field_token(self): + # type: () -> Deb822FieldNameToken + return self._field_token + + @property + def value_element(self): + # type: () -> Deb822ValueElement + return self._value_element + + @value_element.setter + def value_element(self, new_value): + # type: (Deb822ValueElement) -> None + self._full_size_cache = None + self._value_element.clear_parent_if_parent(self) + self._value_element = new_value + new_value.parent_element = self + + def interpret_as( + self, + interpreter, # type: Interpretation[T] + discard_comments_on_read=True, # type: bool + ): + # type: (...) -> T + return interpreter.interpret( + self, discard_comments_on_read=discard_comments_on_read + ) + + @property + def comment_element(self): + # type: () -> Optional[Deb822CommentElement] + return self._comment_element + + @comment_element.setter + def comment_element(self, value): + # type: (Optional[Deb822CommentElement]) -> None + self._full_size_cache = None + if value is not None: + if not value[-1].text.endswith("\n"): + raise ValueError("Field comments must end with a newline") + if self._comment_element: + self._comment_element.clear_parent_if_parent(self) + if value is not None: + value.parent_element = self + self._comment_element = value + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + if self._comment_element: + yield self._comment_element + yield self._field_token + yield self._separator_token + yield self._value_element + + def position_in_parent( + self, + *, + skip_leading_comments: bool = True, + ) -> Position: + position = super().position_in_parent(skip_leading_comments=False) + if skip_leading_comments: + if self._comment_element: + field_pos = self._field_token.position_in_parent() + position = field_pos.relative_to(position) + return position + + def size(self, *, skip_leading_comments: bool = True) -> Range: + if skip_leading_comments: + return Range.from_position_and_sizes( + START_POSITION, + ( + p.size(skip_leading_comments=False) + for p in self.iter_parts() + if not p.is_comment + ), + ) + return super().size(skip_leading_comments=False) + + +def _format_comment(c): + # type: (str) -> str + if c == "": + # Special-case: Empty strings are mapped to an empty comment line + return "#\n" + if "\n" in c[:-1]: + raise ValueError("Comment lines must not have embedded newlines") + if not c.endswith("\n"): + c = c.rstrip() + "\n" + if not c.startswith("#"): + c = "# " + c.lstrip() + return c + + +def _unpack_key( + item, # type: ParagraphKey + raise_if_indexed=False, # type: bool +): + # type: (...) -> Tuple[_strI, Optional[int], Optional[Deb822FieldNameToken]] + index = None # type: Optional[int] + name_token = None # type: Optional[Deb822FieldNameToken] + if isinstance(item, tuple): + key, index = item + if raise_if_indexed: + # Fudge "(key, 0)" into a "key" callers to defensively support + # both paragraph styles with the same key. + if index != 0: + msg = 'Cannot resolve key "{key}" with index {index}. The key is not indexed' + raise KeyError(msg.format(key=key, index=index)) + index = None + key = _strI(key) + else: + index = None + if isinstance(item, Deb822FieldNameToken): + name_token = item + key = name_token.text + else: + key = _strI(item) + + return key, index, name_token + + +def _convert_value_lines_to_lines( + value_lines, # type: Iterable[Deb822ValueLineElement] + strip_comments, # type: bool +): + # type: (...) -> Iterable[str] + if not strip_comments: + yield from (v.convert_to_text() for v in value_lines) + else: + for element in value_lines: + yield "".join(x.text for x in element.iter_tokens() if not x.is_comment) + + +if sys.version_info >= (3, 9) or TYPE_CHECKING: + _ParagraphMapping_Base = collections.abc.Mapping[ParagraphKey, T] +else: + # Python 3.5 - 3.8 compat - we are not allowed to subscript the abc.Iterator + # - use this little hack to work around it + class _ParagraphMapping_Base(collections.abc.Mapping, Generic[T], ABC): + pass + + +# Deb822ParagraphElement uses this Mixin (by having `_paragraph` return self). +# Therefore, the Mixin needs to call the "proper" methods on the paragraph to +# avoid doing infinite recursion. +class AutoResolvingMixin(Generic[T], _ParagraphMapping_Base[T]): + + @property + def _auto_resolve_ambiguous_fields(self): + # type: () -> bool + return True + + @property + def _paragraph(self): + # type: () -> Deb822ParagraphElement + raise NotImplementedError # pragma: no cover + + def __len__(self): + # type: () -> int + return self._paragraph.kvpair_count + + def __contains__(self, item): + # type: (object) -> bool + return self._paragraph.contains_kvpair_element(item) + + def __iter__(self): + # type: () -> Iterator[ParagraphKey] + return iter(self._paragraph.iter_keys()) + + def __getitem__(self, item): + # type: (ParagraphKey) -> T + if self._auto_resolve_ambiguous_fields and isinstance(item, str): + v = self._paragraph.get_kvpair_element((item, 0)) + else: + v = self._paragraph.get_kvpair_element(item) + assert v is not None + return self._interpret_value(item, v) + + def __delitem__(self, item): + # type: (ParagraphKey) -> None + self._paragraph.remove_kvpair_element(item) + + def _interpret_value(self, key, value): + # type: (ParagraphKey, Deb822KeyValuePairElement) -> T + raise NotImplementedError # pragma: no cover + + +# Deb822ParagraphElement uses this Mixin (by having `_paragraph` return self). +# Therefore, the Mixin needs to call the "proper" methods on the paragraph to +# avoid doing infinite recursion. +class Deb822ParagraphToStrWrapperMixin(AutoResolvingMixin[str], ABC): + + @property + def _auto_map_initial_line_whitespace(self): + # type: () -> bool + return True + + @property + def _discard_comments_on_read(self): + # type: () -> bool + return True + + @property + def _auto_map_final_newline_in_multiline_values(self): + # type: () -> bool + return True + + @property + def _preserve_field_comments_on_field_updates(self): + # type: () -> bool + return True + + def _convert_value_to_str(self, kvpair_element): + # type: (Deb822KeyValuePairElement) -> str + value_element = kvpair_element.value_element + value_entries = value_element.value_lines + if len(value_entries) == 1: + # Special case single line entry (e.g. "Package: foo") as they never + # have comments and we can do some parts more efficient. + value_entry = value_entries[0] + t = value_entry.convert_to_text() + if self._auto_map_initial_line_whitespace: + t = t.strip() + return t + + if self._auto_map_initial_line_whitespace or self._discard_comments_on_read: + converter = _convert_value_lines_to_lines( + value_entries, + self._discard_comments_on_read, + ) + + auto_map_space = self._auto_map_initial_line_whitespace + + # Because we know there are more than one line, we can unconditionally inject + # the newline after the first line + as_text = "".join( + line.strip() + "\n" if auto_map_space and i == 1 else line + for i, line in enumerate(converter, start=1) + ) + else: + # No rewrite necessary. + as_text = value_element.convert_to_text() + + if self._auto_map_final_newline_in_multiline_values and as_text[-1] == "\n": + as_text = as_text[:-1] + return as_text + + def __setitem__(self, item, value): + # type: (ParagraphKey, str) -> None + keep_comments = ( + self._preserve_field_comments_on_field_updates + ) # type: Optional[bool] + comment = None + if keep_comments and self._auto_resolve_ambiguous_fields: + # For ambiguous fields, we have to resolve the original field as + # the set_field_* methods do not cope with ambiguous fields. This + # means we might as well clear the keep_comments flag as we have + # resolved the comment. + keep_comments = None + key_lookup = item + if isinstance(item, str): + key_lookup = (item, 0) + orig_kvpair = self._paragraph.get_kvpair_element(key_lookup, use_get=True) + if orig_kvpair is not None: + comment = orig_kvpair.comment_element + + if self._auto_map_initial_line_whitespace: + try: + idx = value.index("\n") + except ValueError: + idx = -1 + if idx == -1 or idx == len(value): + self._paragraph.set_field_to_simple_value( + item, + value.strip(), + preserve_original_field_comment=keep_comments, + field_comment=comment, + ) + return + # Regenerate the first line with normalized whitespace if necessary + first_line, rest = value.split("\n", 1) + if first_line and first_line[:1] not in ("\t", " "): + value = "".join((" ", first_line.strip(), "\n", rest)) + else: + value = "".join((first_line, "\n", rest)) + if not value.endswith("\n"): + if not self._auto_map_final_newline_in_multiline_values: + raise ValueError( + "Values must end with a newline (or be single line" + " values and use the auto whitespace mapping feature)" + ) + value += "\n" + self._paragraph.set_field_from_raw_string( + item, + value, + preserve_original_field_comment=keep_comments, + field_comment=comment, + ) + + def _interpret_value(self, key, value): + # type: (ParagraphKey, Deb822KeyValuePairElement) -> str + # mypy is a bit dense and cannot see that T == str + return self._convert_value_to_str(value) + + +class AbstractDeb822ParagraphWrapper(AutoResolvingMixin[T], ABC): + + def __init__( + self, + paragraph, # type: Deb822ParagraphElement + *, + auto_resolve_ambiguous_fields=False, # type: bool + discard_comments_on_read=True, # type: bool + ): + # type: (...) -> None + self.__paragraph = paragraph + self.__auto_resolve_ambiguous_fields = auto_resolve_ambiguous_fields + self.__discard_comments_on_read = discard_comments_on_read + + @property + def _paragraph(self): + # type: () -> Deb822ParagraphElement + return self.__paragraph + + @property + def _discard_comments_on_read(self): + # type: () -> bool + return self.__discard_comments_on_read + + @property + def _auto_resolve_ambiguous_fields(self): + # type: () -> bool + return self.__auto_resolve_ambiguous_fields + + +class Deb822InterpretingParagraphWrapper(AbstractDeb822ParagraphWrapper[T]): + + def __init__( + self, + paragraph, # type: Deb822ParagraphElement + interpretation, # type: Interpretation[T] + *, + auto_resolve_ambiguous_fields=False, # type: bool + discard_comments_on_read=True, # type: bool + ): + # type: (...) -> None + super().__init__( + paragraph, + auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields, + discard_comments_on_read=discard_comments_on_read, + ) + self._interpretation = interpretation + + def _interpret_value(self, key, value): + # type: (ParagraphKey, Deb822KeyValuePairElement) -> T + return self._interpretation.interpret(value) + + +class Deb822DictishParagraphWrapper( + AbstractDeb822ParagraphWrapper[str], Deb822ParagraphToStrWrapperMixin +): + + def __init__( + self, + paragraph, # type: Deb822ParagraphElement + *, + discard_comments_on_read=True, # type: bool + auto_map_initial_line_whitespace=True, # type: bool + auto_resolve_ambiguous_fields=False, # type: bool + preserve_field_comments_on_field_updates=True, # type: bool + auto_map_final_newline_in_multiline_values=True, # type: bool + ): + # type: (...) -> None + super().__init__( + paragraph, + auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields, + discard_comments_on_read=discard_comments_on_read, + ) + self.__auto_map_initial_line_whitespace = auto_map_initial_line_whitespace + self.__preserve_field_comments_on_field_updates = ( + preserve_field_comments_on_field_updates + ) + self.__auto_map_final_newline_in_multiline_values = ( + auto_map_final_newline_in_multiline_values + ) + + @property + def _auto_map_initial_line_whitespace(self): + # type: () -> bool + return self.__auto_map_initial_line_whitespace + + @property + def _preserve_field_comments_on_field_updates(self): + # type: () -> bool + return self.__preserve_field_comments_on_field_updates + + @property + def _auto_map_final_newline_in_multiline_values(self): + # type: () -> bool + return self.__auto_map_final_newline_in_multiline_values + + +class Deb822ParagraphElement(Deb822Element, Deb822ParagraphToStrWrapperMixin, ABC): + + @classmethod + def new_empty_paragraph(cls): + # type: () -> Deb822ParagraphElement + return Deb822NoDuplicateFieldsParagraphElement([], OrderedSet()) + + @classmethod + def from_dict(cls, mapping): + # type: (Mapping[str, str]) -> Deb822ParagraphElement + paragraph = cls.new_empty_paragraph() + for k, v in mapping.items(): + paragraph[k] = v + return paragraph + + @classmethod + def from_kvpairs(cls, kvpair_elements): + # type: (List[Deb822KeyValuePairElement]) -> Deb822ParagraphElement + if not kvpair_elements: + raise ValueError( + "A paragraph must consist of at least one field/value pair" + ) + kvpair_order = OrderedSet(kv.field_name for kv in kvpair_elements) + if len(kvpair_order) == len(kvpair_elements): + # Each field occurs at most once, which is good because that + # means it is a valid paragraph and we can use the optimized + # implementation. + return Deb822NoDuplicateFieldsParagraphElement( + kvpair_elements, kvpair_order + ) + # Fallback implementation, that can cope with the repeated field names + # at the cost of complexity. + return Deb822DuplicateFieldsParagraphElement(kvpair_elements) + + @property + def has_duplicate_fields(self): + # type: () -> bool + """Tell whether this paragraph has duplicate fields""" + return False + + def as_interpreted_dict_view( + self, + interpretation, # type: Interpretation[T] + *, + auto_resolve_ambiguous_fields=True, # type: bool + ): + # type: (...) -> Deb822InterpretingParagraphWrapper[T] + r"""Provide a Dict-like view of the paragraph + + This method returns a dict-like object representing this paragraph and + is useful for accessing fields in a given interpretation. It is possible + to use multiple versions of this dict-like view with different interpretations + on the same paragraph at the same time (for different fields). + + >>> example_deb822_paragraph = ''' + ... Package: foo + ... # Field comment (because it becomes just before a field) + ... Architecture: amd64 + ... # Inline comment (associated with the next line) + ... i386 + ... # We also support arm + ... arm64 + ... armel + ... ''' + >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines()) + >>> paragraph = next(iter(dfile)) + >>> list_view = paragraph.as_interpreted_dict_view(LIST_SPACE_SEPARATED_INTERPRETATION) + >>> # With the defaults, you only deal with the semantic values + >>> # - no leading or trailing whitespace on the first part of the value + >>> list(list_view["Package"]) + ['foo'] + >>> with list_view["Architecture"] as arch_list: + ... orig_arch_list = list(arch_list) + ... arch_list.replace('i386', 'kfreebsd-amd64') + >>> orig_arch_list + ['amd64', 'i386', 'arm64', 'armel'] + >>> list(list_view["Architecture"]) + ['amd64', 'kfreebsd-amd64', 'arm64', 'armel'] + >>> print(paragraph.dump(), end='') + Package: foo + # Field comment (because it becomes just before a field) + Architecture: amd64 + # Inline comment (associated with the next line) + kfreebsd-amd64 + # We also support arm + arm64 + armel + >>> # Format preserved and architecture replaced + >>> with list_view["Architecture"] as arch_list: + ... # Prettify the result as sorting will cause awkward whitespace + ... arch_list.reformat_when_finished() + ... arch_list.sort() + >>> print(paragraph.dump(), end='') + Package: foo + # Field comment (because it becomes just before a field) + Architecture: amd64 + # We also support arm + arm64 + armel + # Inline comment (associated with the next line) + kfreebsd-amd64 + >>> list(list_view["Architecture"]) + ['amd64', 'arm64', 'armel', 'kfreebsd-amd64'] + >>> # Format preserved and architecture values sorted + + :param interpretation: Decides how the field values are interpreted. As an example, + use LIST_SPACE_SEPARATED_INTERPRETATION for fields such as Architecture in the + debian/control file. + :param auto_resolve_ambiguous_fields: This parameter is only relevant for paragraphs + that contain the same field multiple times (these are generally invalid). If the + caller requests an ambiguous field from an invalid paragraph via a plain field name, + the return dict-like object will refuse to resolve the field (not knowing which + version to pick). This parameter (if set to True) instead changes the error into + assuming the caller wants the *first* variant. + """ + return Deb822InterpretingParagraphWrapper( + self, + interpretation, + auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields, + ) + + def configured_view( + self, + *, + discard_comments_on_read=True, # type: bool + auto_map_initial_line_whitespace=True, # type: bool + auto_resolve_ambiguous_fields=True, # type: bool + preserve_field_comments_on_field_updates=True, # type: bool + auto_map_final_newline_in_multiline_values=True, # type: bool + ): + # type: (...) -> Deb822DictishParagraphWrapper + r"""Provide a Dict[str, str]-like view of this paragraph with non-standard parameters + + This method returns a dict-like object representing this paragraph that is + optionally configured differently from the default view. + + >>> example_deb822_paragraph = ''' + ... Package: foo + ... # Field comment (because it becomes just before a field) + ... Depends: libfoo, + ... # Inline comment (associated with the next line) + ... libbar, + ... ''' + >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines()) + >>> paragraph = next(iter(dfile)) + >>> # With the defaults, you only deal with the semantic values + >>> # - no leading or trailing whitespace on the first part of the value + >>> paragraph["Package"] + 'foo' + >>> # - no inline comments in multiline values (but whitespace will be present + >>> # subsequent lines.) + >>> print(paragraph["Depends"]) + libfoo, + libbar, + >>> paragraph['Foo'] = 'bar' + >>> paragraph.get('Foo') + 'bar' + >>> paragraph.get('Unknown-Field') is None + True + >>> # But you get asymmetric behaviour with set vs. get + >>> paragraph['Foo'] = ' bar\n' + >>> paragraph['Foo'] + 'bar' + >>> paragraph['Bar'] = ' bar\n#Comment\n another value\n' + >>> # Note that the whitespace on the first line has been normalized. + >>> print("Bar: " + paragraph['Bar']) + Bar: bar + another value + >>> # The comment is present (in case you where wondering) + >>> print(paragraph.get_kvpair_element('Bar').convert_to_text(), end='') + Bar: bar + #Comment + another value + >>> # On the other hand, you can choose to see the values as they are + >>> # - We will just reset the paragraph as a "nothing up my sleeve" + >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines()) + >>> paragraph = next(iter(dfile)) + >>> nonstd_dictview = paragraph.configured_view( + ... discard_comments_on_read=False, + ... auto_map_initial_line_whitespace=False, + ... # For paragraphs with duplicate fields, you can choose to get an error + ... # rather than the dict picking the first value available. + ... auto_resolve_ambiguous_fields=False, + ... auto_map_final_newline_in_multiline_values=False, + ... ) + >>> # Because we have reset the state, Foo and Bar are no longer there. + >>> 'Bar' not in paragraph and 'Foo' not in paragraph + True + >>> # We can now see the comments (discard_comments_on_read=False) + >>> # (The leading whitespace in front of "libfoo" is due to + >>> # auto_map_initial_line_whitespace=False) + >>> print(nonstd_dictview["Depends"], end='') + libfoo, + # Inline comment (associated with the next line) + libbar, + >>> # And all the optional whitespace on the first value line + >>> # (auto_map_initial_line_whitespace=False) + >>> nonstd_dictview["Package"] == ' foo\n' + True + >>> # ... which will give you symmetric behaviour with set vs. get + >>> nonstd_dictview['Foo'] = ' bar \n' + >>> nonstd_dictview['Foo'] + ' bar \n' + >>> nonstd_dictview['Bar'] = ' bar \n#Comment\n another value\n' + >>> nonstd_dictview['Bar'] + ' bar \n#Comment\n another value\n' + >>> # But then you get no help either. + >>> try: + ... nonstd_dictview["Baz"] = "foo" + ... except ValueError: + ... print("Rejected") + Rejected + >>> # With auto_map_initial_line_whitespace=False, you have to include minimum a newline + >>> nonstd_dictview["Baz"] = "foo\n" + >>> # The absence of leading whitespace gives you the terse variant at the expensive + >>> # readability + >>> paragraph.get_kvpair_element('Baz').convert_to_text() + 'Baz:foo\n' + >>> # But because they are views, changes performed via one view is visible in the other + >>> paragraph['Foo'] + 'bar' + >>> # The views show the values according to their own rules. Therefore, there is an + >>> # asymmetric between paragraph['Foo'] and nonstd_dictview['Foo'] + >>> # Nevertheless, you can read or write the fields via either - enabling you to use + >>> # the view that best suit your use-case for the given field. + >>> 'Baz' in paragraph and nonstd_dictview.get('Baz') is not None + True + >>> # Deletion via the view also works + >>> del nonstd_dictview['Baz'] + >>> 'Baz' not in paragraph and nonstd_dictview.get('Baz') is None + True + + + :param discard_comments_on_read: When getting a field value from the dict, + this parameter decides how in-line comments are handled. When setting + the value, inline comments are still allowed and will be retained. + However, keep in mind that this option makes getter and setter assymetric + as a "get" following a "set" with inline comments will omit the comments + even if they are there (see the code example). + :param auto_map_initial_line_whitespace: Special-case the first value line + by trimming unnecessary whitespace leaving only the value. For single-line + values, all space including newline is pruned. For multi-line values, the + newline is preserved / needed to distinguish the first line from the + following lines. When setting a value, this option normalizes the + whitespace of the initial line of the value field. + When this option is set to True makes the dictionary behave more like the + original Deb822 module. + :param preserve_field_comments_on_field_updates: Whether to preserve the field + comments when mutating the field. + :param auto_resolve_ambiguous_fields: This parameter is only relevant for paragraphs + that contain the same field multiple times (these are generally invalid). If the + caller requests an ambiguous field from an invalid paragraph via a plain field name, + the return dict-like object will refuse to resolve the field (not knowing which + version to pick). This parameter (if set to True) instead changes the error into + assuming the caller wants the *first* variant. + :param auto_map_final_newline_in_multiline_values: This parameter controls whether + a multiline field with have / need a trailing newline. If True, the trailing + newline is hidden on get and automatically added in set (if missing). + When this option is set to True makes the dictionary behave more like the + original Deb822 module. + """ + return Deb822DictishParagraphWrapper( + self, + discard_comments_on_read=discard_comments_on_read, + auto_map_initial_line_whitespace=auto_map_initial_line_whitespace, + auto_resolve_ambiguous_fields=auto_resolve_ambiguous_fields, + preserve_field_comments_on_field_updates=preserve_field_comments_on_field_updates, + auto_map_final_newline_in_multiline_values=auto_map_final_newline_in_multiline_values, + ) + + @property + def _paragraph(self): + # type: () -> Deb822ParagraphElement + return self + + def order_last(self, field): + # type: (ParagraphKey) -> None + """Re-order the given field so it is "last" in the paragraph""" + raise NotImplementedError # pragma: no cover + + def order_first(self, field): + # type: (ParagraphKey) -> None + """Re-order the given field so it is "first" in the paragraph""" + raise NotImplementedError # pragma: no cover + + def order_before(self, field, reference_field): + # type: (ParagraphKey, ParagraphKey) -> None + """Re-order the given field so appears directly after the reference field in the paragraph + + The reference field must be present.""" + raise NotImplementedError # pragma: no cover + + def order_after(self, field, reference_field): + # type: (ParagraphKey, ParagraphKey) -> None + """Re-order the given field so appears directly before the reference field in the paragraph + + The reference field must be present. + """ + raise NotImplementedError # pragma: no cover + + @property + def kvpair_count(self): + # type: () -> int + raise NotImplementedError # pragma: no cover + + def iter_keys(self): + # type: () -> Iterable[ParagraphKey] + raise NotImplementedError # pragma: no cover + + def contains_kvpair_element(self, item): + # type: (object) -> bool + raise NotImplementedError # pragma: no cover + + def get_kvpair_element( + self, + item, # type: ParagraphKey + use_get=False, # type: bool + ): + # type: (...) -> Optional[Deb822KeyValuePairElement] + raise NotImplementedError # pragma: no cover + + def set_kvpair_element(self, key, value): + # type: (ParagraphKey, Deb822KeyValuePairElement) -> None + raise NotImplementedError # pragma: no cover + + def remove_kvpair_element(self, key): + # type: (ParagraphKey) -> None + raise NotImplementedError # pragma: no cover + + def sort_fields( + self, key=None # type: Optional[Callable[[str], Any]] + ): + # type: (...) -> None + """Re-order all fields + + :param key: Provide a key function (same semantics as for sorted). Keep in mind that + the module preserve the cases for field names - in generally, callers are recommended + to use "lower()" to normalize the case. + """ + raise NotImplementedError # pragma: no cover + + def set_field_to_simple_value( + self, + item, # type: ParagraphKey + simple_value, # type: str + *, + preserve_original_field_comment=None, # type: Optional[bool] + field_comment=None, # type: Optional[Commentish] + ): + # type: (...) -> None + r"""Sets a field in this paragraph to a simple "word" or "phrase" + + In many cases, it is better for callers to just use the paragraph as + if it was a dictionary. However, this method does enable to you choose + the field comment (if any), which can be a reason for using it. + + This is suitable for "simple" fields like "Package". Example: + + >>> example_deb822_paragraph = ''' + ... Package: foo + ... ''' + >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines()) + >>> p = next(iter(dfile)) + >>> p.set_field_to_simple_value("Package", "mscgen") + >>> p.set_field_to_simple_value("Architecture", "linux-any kfreebsd-any", + ... field_comment=['Only ported to linux and kfreebsd']) + >>> p.set_field_to_simple_value("Priority", "optional") + >>> print(p.dump(), end='') + Package: mscgen + # Only ported to linux and kfreebsd + Architecture: linux-any kfreebsd-any + Priority: optional + >>> # Values are formatted nicely by default, but it does not work with + >>> # multi-line values + >>> p.set_field_to_simple_value("Foo", "bar\nbin\n") + Traceback (most recent call last): + ... + ValueError: Cannot use set_field_to_simple_value for values with newlines + + :param item: Name of the field to set. If the paragraph already + contains the field, then it will be replaced. If the field exists, + then it will preserve its order in the paragraph. Otherwise, it is + added to the end of the paragraph. + Note this can be a "paragraph key", which enables you to control + *which* instance of a field is being replaced (in case of duplicate + fields). + :param simple_value: The text to use as the value. The value must not + contain newlines. Leading and trailing will be stripped but space + within the value is preserved. The value cannot contain comments + (i.e. if the "#" token appears in the value, then it is considered + a value rather than "start of a comment) + :param preserve_original_field_comment: See the description for the + parameter with the same name in the set_field_from_raw_string method. + :param field_comment: See the description for the parameter with the same + name in the set_field_from_raw_string method. + """ + if "\n" in simple_value: + raise ValueError( + "Cannot use set_field_to_simple_value for values with newlines" + ) + + # Reformat it with a leading space and trailing newline. The latter because it is + # necessary if there are any fields after it and the former because it looks nicer so + # have single space after the field separator + stripped = simple_value.strip() + if stripped: + raw_value = " " + stripped + "\n" + else: + # Special-case for empty values + raw_value = "\n" + self.set_field_from_raw_string( + item, + raw_value, + preserve_original_field_comment=preserve_original_field_comment, + field_comment=field_comment, + ) + + def set_field_from_raw_string( + self, + item, # type: ParagraphKey + raw_string_value, # type: str + *, + preserve_original_field_comment=None, # type: Optional[bool] + field_comment=None, # type: Optional[Commentish] + ): + # type: (...) -> None + """Sets a field in this paragraph to a given text value + + In many cases, it is better for callers to just use the paragraph as + if it was a dictionary. However, this method does enable to you choose + the field comment (if any) and lets to have a higher degree of control + over whitespace (on the first line), which can be a reason for using it. + + Example usage: + + >>> example_deb822_paragraph = ''' + ... Package: foo + ... ''' + >>> dfile = parse_deb822_file(example_deb822_paragraph.splitlines()) + >>> p = next(iter(dfile)) + >>> raw_value = ''' + ... Build-Depends: debhelper-compat (= 12), + ... some-other-bd, + ... # Comment + ... another-bd, + ... '''.lstrip() # Remove leading newline, but *not* the trailing newline + >>> fname, new_value = raw_value.split(':', 1) + >>> p.set_field_from_raw_string(fname, new_value) + >>> print(p.dump(), end='') + Package: foo + Build-Depends: debhelper-compat (= 12), + some-other-bd, + # Comment + another-bd, + >>> # Format preserved + + :param item: Name of the field to set. If the paragraph already + contains the field, then it will be replaced. Otherwise, it is + added to the end of the paragraph. + Note this can be a "paragraph key", which enables you to control + *which* instance of a field is being replaced (in case of duplicate + fields). + :param raw_string_value: The text to use as the value. The text must + be valid deb822 syntax and is used *exactly* as it is given. + Accordingly, multi-line values must include mandatory leading space + on continuation lines, newlines after the value, etc. On the + flip-side, any optional space or comments will be included. + + Note that the first line will *never* be read as a comment (if the + first line of the value starts with a "#" then it will result + in "Field-Name:#..." which is parsed as a value starting with "#" + rather than a comment). + :param preserve_original_field_comment: If True, then if there is an + existing field and that has a comment, then the comment will remain + after this operation. This is the default is the `field_comment` + parameter is omitted. + Note that if the parameter is True and the item is ambiguous, this + will raise an AmbiguousDeb822FieldKeyError. When the parameter is + omitted, the ambiguity is resolved automatically and if the resolved + field has a comment then that will be preserved (assuming + field_comment is None). + :param field_comment: If not None, add or replace the comment for + the field. Each string in the list will become one comment + line (inserted directly before the field name). Will appear in the + same order as they do in the list. + + If you want complete control over the formatting of the comments, + then ensure that each line start with "#" and end with "\\n" before + the call. Otherwise, leading/trailing whitespace is normalized + and the missing "#"/"\\n" character is inserted. + """ + + new_content = [] # type: List[str] + if preserve_original_field_comment is not None: + if field_comment is not None: + raise ValueError( + 'The "preserve_original_field_comment" conflicts with' + ' "field_comment" parameter' + ) + elif field_comment is not None: + if not isinstance(field_comment, Deb822CommentElement): + new_content.extend(_format_comment(x) for x in field_comment) + field_comment = None + preserve_original_field_comment = False + + field_name, _, _ = _unpack_key(item) + + cased_field_name = field_name + try: + original = self.get_kvpair_element(item, use_get=True) + except AmbiguousDeb822FieldKeyError: + if preserve_original_field_comment: + # If we were asked to preserve the original comment, then we + # require a strict lookup + raise + original = self.get_kvpair_element((field_name, 0), use_get=True) + + if preserve_original_field_comment is None: + # We simplify preserve_original_field_comment after the lookup of the field. + # Otherwise, we can get ambiguous key errors when updating an ambiguous field + # when the caller did not explicitly ask for that behaviour. + preserve_original_field_comment = True + + if original: + # If we already have the field, then preserve the original case + cased_field_name = original.field_name + raw = ":".join((cased_field_name, raw_string_value)) + raw_lines = raw.splitlines(keepends=True) + for i, line in enumerate(raw_lines, start=1): + if not line.endswith("\n"): + raise ValueError( + "Line {i} in new value was missing trailing newline".format(i=i) + ) + if i != 1 and line[0] not in (" ", "\t", "#"): + msg = ( + "Line {i} in new value was invalid. It must either start" + ' with " " space (continuation line) or "#" (comment line).' + ' The line started with "{line}"' + ) + raise ValueError(msg.format(i=i, line=line[0])) + if len(raw_lines) > 1 and raw_lines[-1].startswith("#"): + raise ValueError("The last line in a value field cannot be a comment") + new_content.extend(raw_lines) + # As absurd as it might seem, it is easier to just use the parser to + # construct the AST correctly + deb822_file = parse_deb822_file(iter(new_content)) + error_token = deb822_file.find_first_error_element() + if error_token: + raise ValueError("Syntax error in new field value for " + field_name) + paragraph = next(iter(deb822_file)) + assert isinstance(paragraph, Deb822NoDuplicateFieldsParagraphElement) + value = paragraph.get_kvpair_element(field_name) + assert value is not None + if preserve_original_field_comment: + if original: + value.comment_element = original.comment_element + original.comment_element = None + elif field_comment is not None: + value.comment_element = field_comment + self.set_kvpair_element(item, value) + + @overload + def dump( + self, fd # type: IO[bytes] + ): + # type: (...) -> None + pass + + @overload + def dump(self): + # type: () -> str + pass + + def dump( + self, fd=None # type: Optional[IO[bytes]] + ): + # type: (...) -> Optional[str] + if fd is None: + return "".join(t.text for t in self.iter_tokens()) + for token in self.iter_tokens(): + fd.write(token.text.encode("utf-8")) + return None + + +class Deb822NoDuplicateFieldsParagraphElement(Deb822ParagraphElement): + """Paragraph implementation optimized for valid deb822 files + + When there are no duplicated fields, we can use simpler and faster + datastructures for common operations. + """ + + def __init__( + self, + kvpair_elements, # type: List[Deb822KeyValuePairElement] + kvpair_order, # type: OrderedSet + ): + # type: (...) -> None + super().__init__() + self._kvpair_elements = {kv.field_name: kv for kv in kvpair_elements} + self._kvpair_order = kvpair_order + self._init_parent_of_parts() + + @property + def kvpair_count(self): + # type: () -> int + return len(self._kvpair_elements) + + def order_last(self, field): + # type: (ParagraphKey) -> None + """Re-order the given field so it is "last" in the paragraph""" + unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True) + self._kvpair_order.order_last(unpacked_field) + + def order_first(self, field): + # type: (ParagraphKey) -> None + """Re-order the given field so it is "first" in the paragraph""" + unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True) + self._kvpair_order.order_first(unpacked_field) + + def order_before(self, field, reference_field): + # type: (ParagraphKey, ParagraphKey) -> None + """Re-order the given field so appears directly after the reference field in the paragraph + + The reference field must be present.""" + unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True) + unpacked_ref_field, _, _ = _unpack_key(reference_field, raise_if_indexed=True) + self._kvpair_order.order_before(unpacked_field, unpacked_ref_field) + + def order_after(self, field, reference_field): + # type: (ParagraphKey, ParagraphKey) -> None + """Re-order the given field so appears directly before the reference field in the paragraph + + The reference field must be present. + """ + unpacked_field, _, _ = _unpack_key(field, raise_if_indexed=True) + unpacked_ref_field, _, _ = _unpack_key(reference_field, raise_if_indexed=True) + self._kvpair_order.order_after(unpacked_field, unpacked_ref_field) + + # Overload to narrow the type to just str. + def __iter__(self): + # type: () -> Iterator[str] + return iter(str(k) for k in self._kvpair_order) + + def iter_keys(self): + # type: () -> Iterable[str] + yield from (str(k) for k in self._kvpair_order) + + def remove_kvpair_element(self, key): + # type: (ParagraphKey) -> None + self._full_size_cache = None + key, _, _ = _unpack_key(key, raise_if_indexed=True) + del self._kvpair_elements[key] + self._kvpair_order.remove(key) + + def contains_kvpair_element(self, item): + # type: (object) -> bool + if not isinstance(item, (str, tuple, Deb822FieldNameToken)): + return False + item = cast("ParagraphKey", item) + key, _, _ = _unpack_key(item, raise_if_indexed=True) + return key in self._kvpair_elements + + def get_kvpair_element( + self, + item, # type: ParagraphKey + use_get=False, # type: bool + ): + # type: (...) -> Optional[Deb822KeyValuePairElement] + item, _, _ = _unpack_key(item, raise_if_indexed=True) + if use_get: + return self._kvpair_elements.get(item) + return self._kvpair_elements[item] + + def set_kvpair_element(self, key, value): + # type: (ParagraphKey, Deb822KeyValuePairElement) -> None + key, _, _ = _unpack_key(key, raise_if_indexed=True) + if isinstance(key, Deb822FieldNameToken): + if key is not value.field_token: + raise ValueError( + "Key is a Deb822FieldNameToken, but not *the* Deb822FieldNameToken" + " for the value" + ) + key = value.field_name + else: + if key != value.field_name: + raise ValueError( + "Cannot insert value under a different field value than field name" + " from its Deb822FieldNameToken implies" + ) + # Use the string from the Deb822FieldNameToken as we need to keep that in memory either + # way + key = value.field_name + original_value = self._kvpair_elements.get(key) + self._full_size_cache = None + self._kvpair_elements[key] = value + self._kvpair_order.append(key) + if original_value is not None: + original_value.parent_element = None + value.parent_element = self + + def sort_fields(self, key=None): + # type: (Optional[Callable[[str], Any]]) -> None + """Re-order all fields + + :param key: Provide a key function (same semantics as for sorted). Keep in mind that + the module preserve the cases for field names - in generally, callers are recommended + to use "lower()" to normalize the case. + """ + for last_field_name in reversed(self._kvpair_order): + last_kvpair = self._kvpair_elements[cast("_strI", last_field_name)] + if last_kvpair.value_element.add_final_newline_if_missing(): + self._full_size_cache = None + break + + if key is None: + key = default_field_sort_key + + self._kvpair_order = OrderedSet(sorted(self._kvpair_order, key=key)) + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + yield from ( + self._kvpair_elements[x] + for x in cast("Iterable[_strI]", self._kvpair_order) + ) + + +class Deb822DuplicateFieldsParagraphElement(Deb822ParagraphElement): + + def __init__(self, kvpair_elements): + # type: (List[Deb822KeyValuePairElement]) -> None + super().__init__() + self._kvpair_order = LinkedList() # type: LinkedList[Deb822KeyValuePairElement] + self._kvpair_elements = {} # type: Dict[_strI, List[KVPNode]] + self._init_kvpair_fields(kvpair_elements) + self._init_parent_of_parts() + + @property + def has_duplicate_fields(self): + # type: () -> bool + # Most likely, the answer is "True" but if the caller "fixes" the problem + # then this can return "False" + return len(self._kvpair_order) > len(self._kvpair_elements) + + def _init_kvpair_fields(self, kvpairs): + # type: (Iterable[Deb822KeyValuePairElement]) -> None + assert not self._kvpair_order + assert not self._kvpair_elements + for kv in kvpairs: + field_name = kv.field_name + node = self._kvpair_order.append(kv) + if field_name not in self._kvpair_elements: + self._kvpair_elements[field_name] = [node] + else: + self._kvpair_elements[field_name].append(node) + + def _nodes_being_relocated(self, field): + # type: (ParagraphKey) -> Tuple[List[KVPNode], List[KVPNode]] + key, index, name_token = _unpack_key(field) + nodes = self._kvpair_elements[key] + nodes_being_relocated = [] + + if name_token is not None or index is not None: + single_node = self._resolve_to_single_node(nodes, key, index, name_token) + assert single_node is not None + nodes_being_relocated.append(single_node) + else: + nodes_being_relocated = nodes + return nodes, nodes_being_relocated + + def order_last(self, field): + # type: (ParagraphKey) -> None + """Re-order the given field so it is "last" in the paragraph""" + nodes, nodes_being_relocated = self._nodes_being_relocated(field) + assert len(nodes_being_relocated) == 1 or len(nodes) == len( + nodes_being_relocated + ) + + kvpair_order = self._kvpair_order + for node in nodes_being_relocated: + if kvpair_order.tail_node is node: + # Special case for relocating a single node that happens to be the last. + continue + kvpair_order.remove_node(node) + # assertion for mypy + assert kvpair_order.tail_node is not None + kvpair_order.insert_node_after(node, kvpair_order.tail_node) + + if ( + len(nodes_being_relocated) == 1 + and nodes_being_relocated[0] is not nodes[-1] + ): + single_node = nodes_being_relocated[0] + nodes.remove(single_node) + nodes.append(single_node) + + def order_first(self, field): + # type: (ParagraphKey) -> None + """Re-order the given field so it is "first" in the paragraph""" + nodes, nodes_being_relocated = self._nodes_being_relocated(field) + assert len(nodes_being_relocated) == 1 or len(nodes) == len( + nodes_being_relocated + ) + + kvpair_order = self._kvpair_order + for node in nodes_being_relocated: + if kvpair_order.head_node is node: + # Special case for relocating a single node that happens to be the first. + continue + kvpair_order.remove_node(node) + # assertion for mypy + assert kvpair_order.head_node is not None + kvpair_order.insert_node_before(node, kvpair_order.head_node) + + if len(nodes_being_relocated) == 1 and nodes_being_relocated[0] is not nodes[0]: + single_node = nodes_being_relocated[0] + nodes.remove(single_node) + nodes.insert(0, single_node) + + def order_before(self, field, reference_field): + # type: (ParagraphKey, ParagraphKey) -> None + """Re-order the given field so appears directly after the reference field in the paragraph + + The reference field must be present.""" + nodes, nodes_being_relocated = self._nodes_being_relocated(field) + assert len(nodes_being_relocated) == 1 or len(nodes) == len( + nodes_being_relocated + ) + # For "before" we always use the "first" variant as reference in case of doubt + _, reference_nodes = self._nodes_being_relocated(reference_field) + reference_node = reference_nodes[0] + if reference_node in nodes_being_relocated: + raise ValueError("Cannot re-order a field relative to itself") + + kvpair_order = self._kvpair_order + for node in nodes_being_relocated: + kvpair_order.remove_node(node) + kvpair_order.insert_node_before(node, reference_node) + + if len(nodes_being_relocated) == 1 and len(nodes) > 1: + # Regenerate the (new) relative field order. + field_name = nodes_being_relocated[0].value.field_name + self._regenerate_relative_kvapir_order(field_name) + + def order_after(self, field, reference_field): + # type: (ParagraphKey, ParagraphKey) -> None + """Re-order the given field so appears directly before the reference field in the paragraph + + The reference field must be present. + """ + nodes, nodes_being_relocated = self._nodes_being_relocated(field) + assert len(nodes_being_relocated) == 1 or len(nodes) == len( + nodes_being_relocated + ) + _, reference_nodes = self._nodes_being_relocated(reference_field) + # For "after" we always use the "last" variant as reference in case of doubt + reference_node = reference_nodes[-1] + if reference_node in nodes_being_relocated: + raise ValueError("Cannot re-order a field relative to itself") + + kvpair_order = self._kvpair_order + # Use "reversed" to preserve the relative order of the nodes assuming a bulk reorder + for node in reversed(nodes_being_relocated): + kvpair_order.remove_node(node) + kvpair_order.insert_node_after(node, reference_node) + + if len(nodes_being_relocated) == 1 and len(nodes) > 1: + # Regenerate the (new) relative field order. + field_name = nodes_being_relocated[0].value.field_name + self._regenerate_relative_kvapir_order(field_name) + + def _regenerate_relative_kvapir_order(self, field_name): + # type: (_strI) -> None + nodes = [] + for node in self._kvpair_order.iter_nodes(): + if node.value.field_name == field_name: + nodes.append(node) + self._kvpair_elements[field_name] = nodes + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + yield from self._kvpair_order + + @property + def kvpair_count(self): + # type: () -> int + return len(self._kvpair_order) + + def iter_keys(self): + # type: () -> Iterable[ParagraphKey] + yield from (kv.field_name for kv in self._kvpair_order) + + def _resolve_to_single_node( + self, + nodes, # type: List[KVPNode] + key, # type: str + index, # type: Optional[int] + name_token, # type: Optional[Deb822FieldNameToken] + use_get=False, # type: bool + ): + # type: (...) -> Optional[KVPNode] + if index is None: + if len(nodes) != 1: + if name_token is not None: + node = self._find_node_via_name_token(name_token, nodes) + if node is not None: + return node + msg = ( + "Ambiguous key {key} - the field appears {res_len} times. Use" + " ({key}, index) to denote which instance of the field you want. (Index" + " can be 0..{res_len_1} or e.g. -1 to denote the last field)" + ) + raise AmbiguousDeb822FieldKeyError( + msg.format(key=key, res_len=len(nodes), res_len_1=len(nodes) - 1) + ) + index = 0 + try: + return nodes[index] + except IndexError: + if use_get: + return None + msg = 'Field "{key}" was present but the index "{index}" was invalid.' + raise KeyError(msg.format(key=key, index=index)) + + def get_kvpair_element( + self, + item, # type: ParagraphKey + use_get=False, # type: bool + ): + # type: (...) -> Optional[Deb822KeyValuePairElement] + key, index, name_token = _unpack_key(item) + if use_get: + nodes = self._kvpair_elements.get(key) + if nodes is None: + return None + else: + nodes = self._kvpair_elements[key] + node = self._resolve_to_single_node( + nodes, key, index, name_token, use_get=use_get + ) + if node is not None: + return node.value + return None + + @staticmethod + def _find_node_via_name_token( + name_token, # type: Deb822FieldNameToken + elements, # type: Iterable[KVPNode] + ): + # type: (...) -> Optional[KVPNode] + # if we are given a name token, then it is non-ambiguous if we have exactly + # that name token in our list of nodes. It will be an O(n) lookup but we + # probably do not have that many duplicate fields (and even if do, it is not + # exactly a valid file, so there little reason to optimize for it) + for node in elements: + if name_token is node.value.field_token: + return node + return None + + def contains_kvpair_element(self, item): + # type: (object) -> bool + if not isinstance(item, (str, tuple, Deb822FieldNameToken)): + return False + item = cast("ParagraphKey", item) + try: + return self.get_kvpair_element(item, use_get=True) is not None + except AmbiguousDeb822FieldKeyError: + return True + + def set_kvpair_element(self, key, value): + # type: (ParagraphKey, Deb822KeyValuePairElement) -> None + key, index, name_token = _unpack_key(key) + if name_token: + if name_token is not value.field_token: + original_nodes = self._kvpair_elements.get(value.field_name) + original_node = None + if original_nodes is not None: + original_node = self._find_node_via_name_token( + name_token, original_nodes + ) + + if original_node is None: + raise ValueError( + "Key is a Deb822FieldNameToken, but not *the*" + " Deb822FieldNameToken for the value nor the" + " Deb822FieldNameToken for an existing field in the paragraph" + ) + # Primarily for mypy's sake + assert original_nodes is not None + # Rely on the index-based code below to handle update. + index = original_nodes.index(original_node) + key = value.field_name + else: + if key != value.field_name: + raise ValueError( + "Cannot insert value under a different field value than field name" + " from its Deb822FieldNameToken implies" + ) + # Use the string from the Deb822FieldNameToken as it is a _strI and has the same value + # (memory optimization) + key = value.field_name + self._full_size_cache = None + original_nodes = self._kvpair_elements.get(key) + if original_nodes is None or not original_nodes: + if index is not None and index != 0: + msg = ( + "Cannot replace field ({key}, {index}) as the field does not exist" + " in the first place. Please index-less key or ({key}, 0) if you" + " want to add the field." + ) + raise KeyError(msg.format(key=key, index=index)) + node = self._kvpair_order.append(value) + if key not in self._kvpair_elements: + self._kvpair_elements[key] = [node] + else: + self._kvpair_elements[key].append(node) + return + + replace_all = False + if index is None: + replace_all = True + node = original_nodes[0] + if len(original_nodes) != 1: + self._kvpair_elements[key] = [node] + else: + # We insist on there being an original node, which as a side effect ensures + # you cannot add additional copies of the field. This means that you cannot + # make the problem worse. + node = original_nodes[index] + + # Replace the value of the existing node plus do a little dance + # for the parent element part. + node.value.parent_element = None + value.parent_element = self + node.value = value + + if replace_all and len(original_nodes) != 1: + # If we were in a replace-all mode, discard any remaining nodes + for n in original_nodes[1:]: + n.value.parent_element = None + self._kvpair_order.remove_node(n) + + def remove_kvpair_element(self, key): + # type: (ParagraphKey) -> None + key, idx, name_token = _unpack_key(key) + field_list = self._kvpair_elements[key] + + if name_token is None and idx is None: + self._full_size_cache = None + # Remove all case + for node in field_list: + node.value.parent_element = None + self._kvpair_order.remove_node(node) + del self._kvpair_elements[key] + return + + if name_token is not None: + # Indirection between original_node and node for mypy's sake + original_node = self._find_node_via_name_token(name_token, field_list) + if original_node is None: + msg = 'The field "{key}" is present but key used to access it is not.' + raise KeyError(msg.format(key=key)) + node = original_node + else: + assert idx is not None + try: + node = field_list[idx] + except KeyError: + msg = 'The field "{key}" is present, but the index "{idx}" was invalid.' + raise KeyError(msg.format(key=key, idx=idx)) + + self._full_size_cache = None + if len(field_list) == 1: + del self._kvpair_elements[key] + else: + field_list.remove(node) + node.value.parent_element = None + self._kvpair_order.remove_node(node) + + def sort_fields(self, key=None): + # type: (Optional[Callable[[str], Any]]) -> None + """Re-order all fields + + :param key: Provide a key function (same semantics as for sorted). Keep in mind that + the module preserve the cases for field names - in generally, callers are recommended + to use "lower()" to normalize the case. + """ + + if key is None: + key = default_field_sort_key + + # Work around mypy that cannot seem to shred the Optional notion + # without this little indirection + key_impl = key + + def _actual_key(kvpair): + # type: (Deb822KeyValuePairElement) -> Any + return key_impl(kvpair.field_name) + + for last_kvpair in reversed(self._kvpair_order): + if last_kvpair.value_element.add_final_newline_if_missing(): + self._full_size_cache = None + break + + sorted_kvpair_list = sorted(self._kvpair_order, key=_actual_key) + self._kvpair_order = LinkedList() + self._kvpair_elements = {} + self._init_kvpair_fields(sorted_kvpair_list) + + +class Deb822FileElement(Deb822Element): + """Represents the entire deb822 file""" + + def __init__(self, token_and_elements): + # type: (LinkedList[TokenOrElement]) -> None + super().__init__() + self._token_and_elements = token_and_elements + self._init_parent_of_parts() + + @classmethod + def new_empty_file(cls): + # type: () -> Deb822FileElement + """Creates a new Deb822FileElement with no contents + + Note that a deb822 file must be non-empty to be considered valid + """ + return cls(LinkedList()) + + @property + def is_valid_file(self): + # type: () -> bool + """Returns true if the file is valid + + Invalid elements include error elements (Deb822ErrorElement) but also + issues such as paragraphs with duplicate fields or "empty" files + (a valid deb822 file contains at least one paragraph). + """ + had_paragraph = False + for paragraph in self: + had_paragraph = True + if not paragraph or paragraph.has_duplicate_fields: + return False + + if not had_paragraph: + return False + + return self.find_first_error_element() is None + + def find_first_error_element(self): + # type: () -> Optional[Deb822ErrorElement] + """Returns the first Deb822ErrorElement (or None) in the file""" + return next( + iter(self.iter_recurse(only_element_or_token_type=Deb822ErrorElement)), None + ) + + def __iter__(self): + # type: () -> Iterator[Deb822ParagraphElement] + return iter(self.iter_parts_of_type(Deb822ParagraphElement)) + + def iter_parts(self): + # type: () -> Iterable[TokenOrElement] + yield from self._token_and_elements + + def insert(self, idx, para): + # type: (int, Deb822ParagraphElement) -> None + """Inserts a paragraph into the file at the given "index" of paragraphs + + Note that if the index is between two paragraphs containing a "free + floating" comment (e.g. paragrah/start-of-file, empty line, comment, + empty line, paragraph) then it is unspecified which "side" of the + comment the new paragraph will appear and this may change between + versions of python-debian. + + + >>> original = ''' + ... Package: libfoo-dev + ... Depends: libfoo1 (= ${binary:Version}), ${shlib:Depends}, ${misc:Depends} + ... '''.lstrip() + >>> deb822_file = parse_deb822_file(original.splitlines()) + >>> para1 = Deb822ParagraphElement.new_empty_paragraph() + >>> para1["Source"] = "foo" + >>> para1["Build-Depends"] = "debhelper-compat (= 13)" + >>> para2 = Deb822ParagraphElement.new_empty_paragraph() + >>> para2["Package"] = "libfoo1" + >>> para2["Depends"] = "${shlib:Depends}, ${misc:Depends}" + >>> deb822_file.insert(0, para1) + >>> deb822_file.insert(1, para2) + >>> expected = ''' + ... Source: foo + ... Build-Depends: debhelper-compat (= 13) + ... + ... Package: libfoo1 + ... Depends: ${shlib:Depends}, ${misc:Depends} + ... + ... Package: libfoo-dev + ... Depends: libfoo1 (= ${binary:Version}), ${shlib:Depends}, ${misc:Depends} + ... '''.lstrip() + >>> deb822_file.dump() == expected + True + """ + + anchor_node = None + needs_newline = True + self._full_size_cache = None + if idx == 0: + # Special-case, if idx is 0, then we insert it before everything else. + # This is mostly a cosmetic choice for corner cases involving free-floating + # comments in the file. + if not self._token_and_elements: + self.append(para) + return + anchor_node = self._token_and_elements.head_node + needs_newline = bool(self._token_and_elements) + else: + i = 0 + for node in self._token_and_elements.iter_nodes(): + entry = node.value + if isinstance(entry, Deb822ParagraphElement): + i += 1 + if idx == i - 1: + anchor_node = node + break + + if anchor_node is None: + # Empty list or idx after the last paragraph both degenerate into append + self.append(para) + else: + if needs_newline: + # Remember to inject the "separating" newline between two paragraphs + nl_token = self._set_parent(Deb822WhitespaceToken("\n")) + anchor_node = self._token_and_elements.insert_before( + nl_token, anchor_node + ) + self._token_and_elements.insert_before(self._set_parent(para), anchor_node) + + def append(self, paragraph): + # type: (Deb822ParagraphElement) -> None + """Appends a paragraph to the file + + >>> deb822_file = Deb822FileElement.new_empty_file() + >>> para1 = Deb822ParagraphElement.new_empty_paragraph() + >>> para1["Source"] = "foo" + >>> para1["Build-Depends"] = "debhelper-compat (= 13)" + >>> para2 = Deb822ParagraphElement.new_empty_paragraph() + >>> para2["Package"] = "foo" + >>> para2["Depends"] = "${shlib:Depends}, ${misc:Depends}" + >>> deb822_file.append(para1) + >>> deb822_file.append(para2) + >>> expected = ''' + ... Source: foo + ... Build-Depends: debhelper-compat (= 13) + ... + ... Package: foo + ... Depends: ${shlib:Depends}, ${misc:Depends} + ... '''.lstrip() + >>> deb822_file.dump() == expected + True + """ + tail_element = self._token_and_elements.tail + if paragraph.parent_element is not None: + if paragraph.parent_element is self: + raise ValueError("Paragraph is already a part of this file") + raise ValueError("Paragraph is already part of another Deb822File") + + self._full_size_cache = None + # We need a separating newline if there is not a whitespace token at the end of the file. + # Note the special case where the file ends on a comment; here we insert a whitespace too + # to be sure. Otherwise, we would have to check that there is an empty line before that + # comment and that is too much effort. + if tail_element and not isinstance(tail_element, Deb822WhitespaceToken): + self._token_and_elements.append( + self._set_parent(Deb822WhitespaceToken("\n")) + ) + self._token_and_elements.append(self._set_parent(paragraph)) + + def remove(self, paragraph): + # type: (Deb822ParagraphElement) -> None + if paragraph.parent_element is not self: + raise ValueError("Paragraph is part of a different file") + node = None + for node in self._token_and_elements.iter_nodes(): + if node.value is paragraph: + break + if node is None: + raise RuntimeError("unable to find paragraph") + self._full_size_cache = None + previous_node = node.previous_node + next_node = node.next_node + self._token_and_elements.remove_node(node) + if next_node is None: + if previous_node and isinstance(previous_node.value, Deb822WhitespaceToken): + self._token_and_elements.remove_node(previous_node) + else: + if isinstance(next_node.value, Deb822WhitespaceToken): + self._token_and_elements.remove_node(next_node) + paragraph.parent_element = None + + def _set_parent(self, t): + # type: (TE) -> TE + t.parent_element = self + return t + + def position_in_parent(self, *, skip_leading_comments: bool = True) -> Position: + # Recursive base-case + return START_POSITION + + def position_in_file(self, *, skip_leading_comments: bool = True) -> Position: + # By definition + return START_POSITION + + @overload + def dump( + self, fd # type: IO[bytes] + ): + # type: (...) -> None + pass + + @overload + def dump(self): + # type: () -> str + pass + + def dump( + self, fd=None # type: Optional[IO[bytes]] + ): + # type: (...) -> Optional[str] + if fd is None: + return "".join(t.text for t in self.iter_tokens()) + for token in self.iter_tokens(): + fd.write(token.text.encode("utf-8")) + return None + + +_combine_error_tokens_into_elements = combine_into_replacement( + Deb822ErrorToken, Deb822ErrorElement +) +_combine_comment_tokens_into_elements = combine_into_replacement( + Deb822CommentToken, Deb822CommentElement +) +_combine_vl_elements_into_value_elements = combine_into_replacement( + Deb822ValueLineElement, Deb822ValueElement +) +_combine_kvp_elements_into_paragraphs = combine_into_replacement( + Deb822KeyValuePairElement, + Deb822ParagraphElement, + constructor=Deb822ParagraphElement.from_kvpairs, +) + + +def _parsed_value_render_factory(discard_comments): + # type: (bool) -> Callable[[Deb822ParsedValueElement], str] + return ( + Deb822ParsedValueElement.convert_to_text_without_comments + if discard_comments + else Deb822ParsedValueElement.convert_to_text + ) + + +LIST_SPACE_SEPARATED_INTERPRETATION = ListInterpretation( + whitespace_split_tokenizer, + _parse_whitespace_list_value, + Deb822ParsedValueElement, + Deb822SemanticallySignificantWhiteSpace, + lambda: Deb822SpaceSeparatorToken(" "), + _parsed_value_render_factory, +) +LIST_COMMA_SEPARATED_INTERPRETATION = ListInterpretation( + comma_split_tokenizer, + _parse_comma_list_value, + Deb822ParsedValueElement, + Deb822CommaToken, + Deb822CommaToken, + _parsed_value_render_factory, +) +LIST_UPLOADERS_INTERPRETATION = ListInterpretation( + comma_split_tokenizer, + _parse_uploaders_list_value, + Deb822ParsedValueElement, + Deb822CommaToken, + Deb822CommaToken, + _parsed_value_render_factory, +) + + +def _non_end_of_line_token(v): + # type: (TokenOrElement) -> bool + # Consume tokens until the newline + return not isinstance(v, Deb822WhitespaceToken) or v.text != "\n" + + +def _build_value_line( + token_stream, # type: Iterable[Union[TokenOrElement, Deb822CommentElement]] +): + # type: (...) -> Iterable[Union[TokenOrElement, Deb822ValueLineElement]] + """Parser helper - consumes tokens part of a Deb822ValueEntryElement and turns them into one""" + buffered_stream = BufferingIterator(token_stream) + + # Deb822ValueLineElement is a bit tricky because of how we handle whitespace + # and comments. + # + # In relation to comments, then only continuation lines can have comments. + # If there is a comment before a "K: V" line, then the comment is associated + # with the field rather than the value. + # + # On the whitespace front, then we separate syntactical mandatory whitespace + # from optional whitespace. As an example: + # + # """ + # # some comment associated with the Depends field + # Depends:_foo_$ + # # some comment associated with the line containing "bar" + # !________bar_$ + # """ + # + # Where "$" and "!" represents mandatory whitespace (the newline and the first + # space are required for the file to be parsed correctly), where as "_" is + # "optional" whitespace (from a syntactical point of view). + # + # This distinction enable us to facilitate APIs for easy removal/normalization + # of redundant whitespaces without having programmers worry about trashing + # the file. + # + # + + comment_element = None + continuation_line_token = None + token = None # type: Optional[TokenOrElement] + + for token in buffered_stream: + start_of_value_entry = False + if isinstance(token, Deb822ValueContinuationToken): + continuation_line_token = token + start_of_value_entry = True + token = None + elif isinstance(token, Deb822FieldSeparatorToken): + start_of_value_entry = True + elif isinstance(token, Deb822CommentElement): + next_token = buffered_stream.peek() + # If the next token is a continuation line token, then this comment + # belong to a value and we might as well just start the value + # parsing now. + # + # Note that we rely on this behaviour to avoid emitting the comment + # token (failing to do so would cause the comment to appear twice + # in the file). + if isinstance(next_token, Deb822ValueContinuationToken): + start_of_value_entry = True + comment_element = token + token = None + # Use next with None to avoid raising StopIteration inside a generator + # It won't happen, but pylint cannot see that, so we do this instead. + continuation_line_token = cast( + "Deb822ValueContinuationToken", next(buffered_stream, None) + ) + assert continuation_line_token is not None + + if token is not None: + yield token + if start_of_value_entry: + tokens_in_value = list(buffered_stream.takewhile(_non_end_of_line_token)) + eol_token = cast("Deb822WhitespaceToken", next(buffered_stream, None)) + assert eol_token is None or eol_token.text == "\n" + leading_whitespace = None + trailing_whitespace = None + # "Depends:\n foo" would cause tokens_in_value to be empty for the + # first "value line" (the empty part between ":" and "\n") + if tokens_in_value: + # Another special-case, "Depends: \n foo" (i.e. space after colon) + # should not introduce an IndexError + if isinstance(tokens_in_value[-1], Deb822WhitespaceToken): + trailing_whitespace = cast( + "Deb822WhitespaceToken", tokens_in_value.pop() + ) + if tokens_in_value and isinstance( + tokens_in_value[-1], Deb822WhitespaceToken + ): + leading_whitespace = cast( + "Deb822WhitespaceToken", tokens_in_value[0] + ) + tokens_in_value = tokens_in_value[1:] + yield Deb822ValueLineElement( + comment_element, + continuation_line_token, + leading_whitespace, + tokens_in_value, + trailing_whitespace, + eol_token, + ) + comment_element = None + continuation_line_token = None + + +def _build_field_with_value( + token_stream, # type: Iterable[Union[TokenOrElement, Deb822ValueElement]] +): + # type: (...) -> Iterable[Union[TokenOrElement, Deb822KeyValuePairElement]] + buffered_stream = BufferingIterator(token_stream) + for token_or_element in buffered_stream: + start_of_field = False + comment_element = None + if isinstance(token_or_element, Deb822FieldNameToken): + start_of_field = True + elif isinstance(token_or_element, Deb822CommentElement): + comment_element = token_or_element + next_token = buffered_stream.peek() + start_of_field = isinstance(next_token, Deb822FieldNameToken) + if start_of_field: + # Remember to consume the field token + try: + token_or_element = next(buffered_stream) + except StopIteration: # pragma: no cover + raise AssertionError + + if start_of_field: + field_name = token_or_element + separator = next(buffered_stream, None) + value_element = next(buffered_stream, None) + if separator is None or value_element is None: + # Early EOF - should not be possible with how the tokenizer works + # right now, but now it is future-proof. + if comment_element: + yield comment_element + error_elements = [field_name] + if separator is not None: + error_elements.append(separator) + yield Deb822ErrorElement(error_elements) + return + + if isinstance(separator, Deb822FieldSeparatorToken) and isinstance( + value_element, Deb822ValueElement + ): + yield Deb822KeyValuePairElement( + comment_element, + cast("Deb822FieldNameToken", field_name), + separator, + value_element, + ) + else: + # We had a parse error, consume until the newline. + error_tokens = [token_or_element] # type: List[TokenOrElement] + error_tokens.extend(buffered_stream.takewhile(_non_end_of_line_token)) + nl = buffered_stream.peek() + # Take the newline as well if present + if nl and isinstance(nl, Deb822NewlineAfterValueToken): + next(buffered_stream, None) + error_tokens.append(nl) + yield Deb822ErrorElement(error_tokens) + else: + # Token is not part of a field, emit it as-is + yield token_or_element + + +def _abort_on_error_tokens(sequence): + # type: (Iterable[TokenOrElement]) -> Iterable[TokenOrElement] + line_no = 1 + for token in sequence: + # We are always called while the sequence consists entirely of tokens + if token.is_error: + error_as_text = token.convert_to_text().replace("\n", "\\n") + raise SyntaxOrParseError( + 'Syntax or Parse error on or near line {line_no}: "{error_as_text}"'.format( + error_as_text=error_as_text, line_no=line_no + ) + ) + line_no += token.convert_to_text().count("\n") + yield token + + +def parse_deb822_file( + sequence, # type: Union[Iterable[Union[str, bytes]], str] + *, + accept_files_with_error_tokens=False, # type: bool + accept_files_with_duplicated_fields=False, # type: bool + encoding="utf-8", # type: str +): + # type: (...) -> Deb822FileElement + """ + + :param sequence: An iterable over lines of str or bytes (an open file for + reading will do). If line endings are provided in the input, then they + must be present on every line (except the last) will be preserved as-is. + If omitted and the content is at least 2 lines, then parser will assume + implicit newlines. + :param accept_files_with_error_tokens: If True, files with critical syntax + or parse errors will be returned as "successfully" parsed. Usually, + working on files with this kind of errors are not desirable as it is + hard to make sense of such files (and they might in fact not be a deb822 + file at all). When set to False (the default) a ValueError is raised if + there is a critical syntax or parse error. + Note that duplicated fields in a paragraph is not considered a critical + parse error by this parser as the implementation can gracefully cope + with these. Use accept_files_with_duplicated_fields to determine if + such files should be accepted. + :param accept_files_with_duplicated_fields: If True, then + files containing paragraphs with duplicated fields will be returned as + "successfully" parsed even though they are invalid according to the + specification. The paragraphs will prefer the first appearance of the + field unless caller explicitly requests otherwise (e.g., via + Deb822ParagraphElement.configured_view). If False, then this method + will raise a ValueError if any duplicated fields are seen inside any + paragraph. + :param encoding: The encoding to use (this is here to support Deb822-like + APIs, new code should not use this parameter). + """ + + if isinstance(sequence, (str, bytes)): + # Match the deb822 API. + sequence = sequence.splitlines(True) + + # The order of operations are important here. As an example, + # _build_value_line assumes that all comment tokens have been merged + # into comment elements. Likewise, _build_field_and_value assumes + # that value tokens (along with their comments) have been combined + # into elements. + tokens = tokenize_deb822_file( + sequence, encoding=encoding + ) # type: Iterable[TokenOrElement] + if not accept_files_with_error_tokens: + tokens = _abort_on_error_tokens(tokens) + tokens = _combine_comment_tokens_into_elements(tokens) + tokens = _build_value_line(tokens) + tokens = _combine_vl_elements_into_value_elements(tokens) + tokens = _build_field_with_value(tokens) + tokens = _combine_kvp_elements_into_paragraphs(tokens) + # Combine any free-floating error tokens into error elements. We do + # this last as it enables other parts of the parser to include error + # tokens in their error elements if they discover something is wrong. + tokens = _combine_error_tokens_into_elements(tokens) + + deb822_file = Deb822FileElement(LinkedList(tokens)) + + if not accept_files_with_duplicated_fields: + for no, paragraph in enumerate(deb822_file): + if isinstance(paragraph, Deb822DuplicateFieldsParagraphElement): + field_names = set() + dup_field = None + for field in paragraph.keys(): + field_name, _, _ = _unpack_key(field) + # assert for mypy + assert isinstance(field_name, str) + if field_name in field_names: + dup_field = field_name + break + field_names.add(field_name) + if dup_field is not None: + msg = 'Duplicate field "{dup_field}" in paragraph number {no}' + raise ValueError(msg.format(dup_field=dup_field, no=no)) + + return deb822_file + + +if __name__ == "__main__": # pragma: no cover + import doctest + + doctest.testmod() diff --git a/src/debputy/lsp/vendoring/_deb822_repro/tokens.py b/src/debputy/lsp/vendoring/_deb822_repro/tokens.py new file mode 100644 index 0000000..4e5fa16 --- /dev/null +++ b/src/debputy/lsp/vendoring/_deb822_repro/tokens.py @@ -0,0 +1,516 @@ +import re +import sys +import weakref +from weakref import ReferenceType + +from ._util import BufferingIterator +from .locatable import ( + Locatable, + START_POSITION, + Range, + ONE_CHAR_RANGE, + ONE_LINE_RANGE, + Position, +) +from debian._util import resolve_ref, _strI + +try: + from typing import Optional, cast, TYPE_CHECKING, Iterable, Union, Dict, Callable +except ImportError: + # pylint: disable=unnecessary-lambda-assignment + TYPE_CHECKING = False + cast = lambda t, v: v + +if TYPE_CHECKING: + from .parsing import Deb822Element + + +# Consume whitespace and a single word. +_RE_WHITESPACE_SEPARATED_WORD_LIST = re.compile( + r""" + (?P<space_before>\s*) # Consume any whitespace before the word + # The space only occurs in practise if the line starts + # with space. + + # Optionally consume a word (needed to handle the case + # when there are no words left and someone applies this + # pattern to the remaining text). This is mostly here as + # a fail-safe. + + (?P<word>\S+) # Consume the word (if present) + (?P<trailing_whitespace>\s*) # Consume trailing whitespace +""", + re.VERBOSE, +) +_RE_COMMA_SEPARATED_WORD_LIST = re.compile( + r""" + # This regex is slightly complicated by the fact that it should work with + # finditer and comsume the entire value. + # + # To do this, we structure the regex so it always starts on a comma (except + # for the first iteration, where we permit the absence of a comma) + + (?: # Optional space followed by a mandatory comma unless + # it is the start of the "line" (in which case, we + # allow the comma to be omitted) + ^ + | + (?: + (?P<space_before_comma>\s*) # This space only occurs in practise if the line + # starts with space + comma. + (?P<comma> ,) + ) + ) + + # From here it is "optional space, maybe a word and then optional space" again. One reason why + # all of it is optional is to gracefully cope with trailing commas. + (?P<space_before_word>\s*) + (?P<word> [^,\s] (?: [^,]*[^,\s])? )? # "Words" can contain spaces for comma separated list. + # But surrounding whitespace is ignored + (?P<space_after_word>\s*) +""", + re.VERBOSE, +) + +# From Policy 5.1: +# +# The field name is composed of US-ASCII characters excluding control +# characters, space, and colon (i.e., characters in the ranges U+0021 +# (!) through U+0039 (9), and U+003B (;) through U+007E (~), +# inclusive). Field names must not begin with the comment character +# (U+0023 #), nor with the hyphen character (U+002D -). +# +# That combines to this regex of questionable readability +_RE_FIELD_LINE = re.compile( + r""" + ^ # Start of line + (?P<field_name> # Capture group for the field name + [\x21\x22\x24-\x2C\x2F-\x39\x3B-\x7F] # First character + [\x21-\x39\x3B-\x7F]* # Subsequent characters (if any) + ) + (?P<separator> : ) + (?P<space_before_value> \s* ) + (?: # Field values are not mandatory on the same line + # as the field name. + + (?P<value> \S(?:.*\S)? ) # Values must start and end on a "non-space" + (?P<space_after_value> \s* ) # We can have optional space after the value + )? +""", + re.VERBOSE, +) + + +class Deb822Token(Locatable): + """A token is an atomic syntactical element from a deb822 file + + A file is parsed into a series of tokens. If these tokens are converted to + text in exactly the same order, you get exactly the same file - bit-for-bit. + Accordingly ever bit of text in a file must be assigned to exactly one + Deb822Token. + """ + + __slots__ = ("_text", "_parent_element", "_token_size", "__weakref__") + + def __init__(self, text): + # type: (str) -> None + if text == "": # pragma: no cover + raise ValueError("Tokens must have content") + self._text = text # type: str + self._parent_element = None # type: Optional[ReferenceType['Deb822Element']] + self._token_size = None # type: Optional[Range] + self._verify_token_text() + + def __repr__(self): + # type: () -> str + return "{clsname}('{text}')".format( + clsname=self.__class__.__name__, text=self._text.replace("\n", "\\n") + ) + + def _verify_token_text(self): + # type: () -> None + if "\n" in self._text: + is_single_line_token = False + if self.is_comment or self.is_error: + is_single_line_token = True + if not is_single_line_token and not self.is_whitespace: + raise ValueError( + "Only whitespace, error and comment tokens may contain newlines" + ) + if not self.text.endswith("\n"): + raise ValueError("Tokens containing whitespace must end on a newline") + if is_single_line_token and "\n" in self.text[:-1]: + raise ValueError( + "Comments and error tokens must not contain embedded newlines" + " (only end on one)" + ) + + @property + def is_whitespace(self): + # type: () -> bool + return False + + @property + def is_comment(self): + # type: () -> bool + return False + + @property + def is_error(self): + # type: () -> bool + return False + + @property + def text(self): + # type: () -> str + return self._text + + # To support callers that want a simple interface for converting tokens and elements to text + def convert_to_text(self): + # type: () -> str + return self._text + + def size(self, *, skip_leading_comments: bool = False) -> Range: + # As tokens are an atomtic unit + token_size = self._token_size + if token_size is not None: + return token_size + token_len = len(self._text) + if token_len == 1: + # The indirection with `r` because mypy gets confused and thinks that `token_size` + # cannot have any type at all. + token_size = ONE_CHAR_RANGE if self._text != "\n" else ONE_LINE_RANGE + else: + new_lines = self._text.count("\n") + assert not new_lines or self._text[-1] == "\n" + end_pos = Position(new_lines, 0) if new_lines else Position(0, token_len) + token_size = Range(START_POSITION, end_pos) + self._token_size = token_size + return token_size + + @property + def parent_element(self): + # type: () -> Optional[Deb822Element] + return resolve_ref(self._parent_element) + + @parent_element.setter + def parent_element(self, new_parent): + # type: (Optional[Deb822Element]) -> None + self._parent_element = ( + weakref.ref(new_parent) if new_parent is not None else None + ) + + def clear_parent_if_parent(self, parent): + # type: (Deb822Element) -> None + if parent is self.parent_element: + self._parent_element = None + + +class Deb822WhitespaceToken(Deb822Token): + """The token is a kind of whitespace. + + Some whitespace tokens are critical for the format (such as the Deb822ValueContinuationToken, + spaces that separate words in list separated by spaces or newlines), while other whitespace + tokens are truly insignificant (space before a newline, space after a comma in a comma + list, etc.). + """ + + __slots__ = () + + @property + def is_whitespace(self): + # type: () -> bool + return True + + +class Deb822SemanticallySignificantWhiteSpace(Deb822WhitespaceToken): + """Whitespace that (if removed) would change the meaning of the file (or cause syntax errors)""" + + __slots__ = () + + +class Deb822NewlineAfterValueToken(Deb822SemanticallySignificantWhiteSpace): + """The newline after a value token. + + If not followed by a continuation token, this also marks the end of the field. + """ + + __slots__ = () + + def __init__(self): + # type: () -> None + super().__init__("\n") + + +class Deb822ValueContinuationToken(Deb822SemanticallySignificantWhiteSpace): + """The whitespace denoting a value spanning an additional line (the first space on a line)""" + + __slots__ = () + + +class Deb822SpaceSeparatorToken(Deb822SemanticallySignificantWhiteSpace): + """Whitespace between values in a space list (e.g. "Architectures")""" + + __slots__ = () + + +class Deb822ErrorToken(Deb822Token): + """Token that represents a syntactical error""" + + __slots__ = () + + @property + def is_error(self): + # type: () -> bool + return True + + +class Deb822CommentToken(Deb822Token): + + __slots__ = () + + @property + def is_comment(self): + # type: () -> bool + return True + + +class Deb822FieldNameToken(Deb822Token): + + __slots__ = () + + def __init__(self, text): + # type: (str) -> None + if not isinstance(text, _strI): + text = _strI(sys.intern(text)) + super().__init__(text) + + @property + def text(self): + # type: () -> _strI + return cast("_strI", self._text) + + +# The colon after the field name, parenthesis, etc. +class Deb822SeparatorToken(Deb822Token): + + __slots__ = () + + +class Deb822FieldSeparatorToken(Deb822Token): + + __slots__ = () + + def __init__(self): + # type: () -> None + super().__init__(":") + + +class Deb822CommaToken(Deb822SeparatorToken): + """Used by the comma-separated list value parsers to denote a comma between two value tokens.""" + + __slots__ = () + + def __init__(self): + # type: () -> None + super().__init__(",") + + +class Deb822PipeToken(Deb822SeparatorToken): + """Used in some dependency fields as OR relation""" + + __slots__ = () + + def __init__(self): + # type: () -> None + super().__init__("|") + + +class Deb822ValueToken(Deb822Token): + """A field value can be split into multi "Deb822ValueToken"s (as well as separator tokens)""" + + __slots__ = () + + +class Deb822ValueDependencyToken(Deb822Token): + """Package name, architecture name, a version number, or a profile name in a dependency field""" + + __slots__ = () + + +class Deb822ValueDependencyVersionRelationOperatorToken(Deb822Token): + + __slots__ = () + + +def tokenize_deb822_file(sequence, encoding="utf-8"): + # type: (Iterable[Union[str, bytes]], str) -> Iterable[Deb822Token] + """Tokenize a deb822 file + + :param sequence: An iterable of lines (a file open for reading will do) + :param encoding: The encoding to use (this is here to support Deb822-like + APIs, new code should not use this parameter). + """ + current_field_name = None + field_name_cache = {} # type: Dict[str, _strI] + + def _normalize_input(s): + # type: (Iterable[Union[str, bytes]]) -> Iterable[str] + for x in s: + if isinstance(x, bytes): + x = x.decode(encoding) + if not x.endswith("\n"): + # We always end on a newline because it makes a lot of code simpler. The pain + # points relates to mutations that add content after the last field. Sadly, these + # mutations can happen via adding fields, reordering fields, etc. and are too hard + # to track to make it worth it to support the special case that makes up missing + # a newline at the end of the file. + x += "\n" + yield x + + text_stream = BufferingIterator( + _normalize_input(sequence) + ) # type: BufferingIterator[str] + + for line in text_stream: + if line.isspace(): + if current_field_name: + # Blank lines terminate fields + current_field_name = None + + # If there are multiple whitespace-only lines, we combine them + # into one token. + r = list(text_stream.takewhile(str.isspace)) + if r: + line += "".join(r) + + # whitespace tokens are likely to have duplicate cases (like + # single newline tokens), so we intern the strings there. + yield Deb822WhitespaceToken(sys.intern(line)) + continue + + if line[0] == "#": + yield Deb822CommentToken(line) + continue + + if line[0] in (" ", "\t"): + if current_field_name is not None: + # We emit a separate whitespace token for the newline as it makes some + # things easier later (see _build_value_line) + leading = sys.intern(line[0]) + # Pull out the leading space and newline + line = line[1:-1] + yield Deb822ValueContinuationToken(leading) + yield Deb822ValueToken(line) + yield Deb822NewlineAfterValueToken() + else: + yield Deb822ErrorToken(line) + continue + + field_line_match = _RE_FIELD_LINE.match(line) + if field_line_match: + # The line is a field, which means there is a bit to unpack + # - note that by definition, leading and trailing whitespace is insignificant + # on the value part directly after the field separator + (field_name, _, space_before, value, space_after) = ( + field_line_match.groups() + ) + + current_field_name = field_name_cache.get(field_name) + + if value is None or value == "": + # If there is no value, then merge the two space elements into space_after + # as it makes it easier to handle the newline. + space_after = ( + space_before + space_after if space_after else space_before + ) + space_before = "" + + if space_after: + # We emit a separate whitespace token for the newline as it makes some + # things easier later (see _build_value_line) + if space_after.endswith("\n"): + space_after = space_after[:-1] + + if current_field_name is None: + field_name = sys.intern(field_name) + current_field_name = _strI(field_name) + field_name_cache[field_name] = current_field_name + + # We use current_field_name from here as it is a _strI. + # Delete field_name to avoid accidentally using it and getting bugs + # that should not happen. + del field_name + + yield Deb822FieldNameToken(current_field_name) + yield Deb822FieldSeparatorToken() + if space_before: + yield Deb822WhitespaceToken(sys.intern(space_before)) + if value: + yield Deb822ValueToken(value) + if space_after: + yield Deb822WhitespaceToken(sys.intern(space_after)) + yield Deb822NewlineAfterValueToken() + else: + yield Deb822ErrorToken(line) + + +def _value_line_tokenizer(func): + # type: (Callable[[str], Iterable[Deb822Token]]) -> (Callable[[str], Iterable[Deb822Token]]) + def impl(v): + # type: (str) -> Iterable[Deb822Token] + first_line = True + for no, line in enumerate(v.splitlines(keepends=True)): + assert not v.isspace() or no == 0 + if line.startswith("#"): + yield Deb822CommentToken(line) + continue + has_newline = False + continuation_line_marker = None + if not first_line: + continuation_line_marker = line[0] + line = line[1:] + first_line = False + if line.endswith("\n"): + has_newline = True + line = line[:-1] + if continuation_line_marker is not None: + yield Deb822ValueContinuationToken(sys.intern(continuation_line_marker)) + yield from func(line) + if has_newline: + yield Deb822NewlineAfterValueToken() + + return impl + + +@_value_line_tokenizer +def whitespace_split_tokenizer(v): + # type: (str) -> Iterable[Deb822Token] + assert "\n" not in v + for match in _RE_WHITESPACE_SEPARATED_WORD_LIST.finditer(v): + space_before, word, space_after = match.groups() + if space_before: + yield Deb822SpaceSeparatorToken(sys.intern(space_before)) + yield Deb822ValueToken(word) + if space_after: + yield Deb822SpaceSeparatorToken(sys.intern(space_after)) + + +@_value_line_tokenizer +def comma_split_tokenizer(v): + # type: (str) -> Iterable[Deb822Token] + assert "\n" not in v + for match in _RE_COMMA_SEPARATED_WORD_LIST.finditer(v): + space_before_comma, comma, space_before_word, word, space_after_word = ( + match.groups() + ) + if space_before_comma: + yield Deb822WhitespaceToken(sys.intern(space_before_comma)) + if comma: + yield Deb822CommaToken() + if space_before_word: + yield Deb822WhitespaceToken(sys.intern(space_before_word)) + if word: + yield Deb822ValueToken(word) + if space_after_word: + yield Deb822WhitespaceToken(sys.intern(space_after_word)) diff --git a/src/debputy/lsp/vendoring/_deb822_repro/types.py b/src/debputy/lsp/vendoring/_deb822_repro/types.py new file mode 100644 index 0000000..7b78024 --- /dev/null +++ b/src/debputy/lsp/vendoring/_deb822_repro/types.py @@ -0,0 +1,93 @@ +try: + from typing import TypeVar, Union, Tuple, List, Callable, Iterator, TYPE_CHECKING + + if TYPE_CHECKING: + from .tokens import Deb822Token, Deb822FieldNameToken + from .parsing import ( + Deb822Element, + Deb822CommentElement, + Deb822ParsedValueElement, + ) + from .formatter import FormatterContentToken + + TokenOrElement = Union["Deb822Element", "Deb822Token"] + TE = TypeVar("TE", bound=TokenOrElement) + + # Used as a resulting element for "mapping" functions that map TE -> R (see _combine_parts) + R = TypeVar("R", bound="Deb822Element") + + VE = TypeVar("VE", bound="Deb822Element") + + ST = TypeVar("ST", bound="Deb822Token") + + # Internal type for part of the paragraph key. Used to facility _unpack_key. + ParagraphKeyBase = Union["Deb822FieldNameToken", str] + + ParagraphKey = Union[ParagraphKeyBase, Tuple[str, int]] + + Commentish = Union[List[str], "Deb822CommentElement"] + + FormatterCallback = Callable[ + [str, "FormatterContentToken", Iterator["FormatterContentToken"]], + Iterator[Union["FormatterContentToken", str]], + ] + try: + # Set __doc__ attributes if possible + TE.__doc__ = """ + Generic "Token or Element" type + """ + R.__doc__ = """ + For internal usage in _deb822_repro + """ + VE.__doc__ = """ + Value type/element in a list interpretation of a field value + """ + ST.__doc__ = """ + Separator type/token in a list interpretation of a field value + """ + ParagraphKeyBase.__doc__ = """ + For internal usage in _deb822_repro + """ + ParagraphKey.__doc__ = """ + Anything accepted as a key for a paragraph field lookup. The simple case being + a str. Alternative variants are mostly interesting for paragraphs with repeated + fields (to enable unambiguous lookups) + """ + Commentish.__doc__ = """ + Anything accepted as input for a Comment. The simple case is the list + of string (each element being a line of comment). The alternative format is + there for enable reuse of an existing element (e.g. to avoid "unpacking" + only to "re-pack" an existing comment element). + """ + FormatterCallback.__doc__ = """\ + Formatter callback used with the round-trip safe parser + + See debian._repro_deb822.formatter.format_field for details + """ + except AttributeError: + # Python 3.5 does not allow update to the __doc__ attribute - ignore that + pass +except ImportError: + pass + + +class AmbiguousDeb822FieldKeyError(KeyError): + """Specialized version of KeyError to denote a valid but ambiguous field name + + This exception occurs if: + * the field is accessed via a str on a configured view that does not automatically + resolve ambiguous field names (see Deb822ParagraphElement.configured_view), AND + * a concrete paragraph contents a repeated field (which is not valid in deb822 + but the module supports parsing them) + + Note that the default is to automatically resolve ambiguous fields. Accordingly + you will only see this exception if you have "opted in" on wanting to know that + the lookup was ambiguous. + + The ambiguity can be resolved by using a tuple of (<field-name>, <filed-index>) + instead of <field-name>. + """ + + +class SyntaxOrParseError(ValueError): + """Specialized version of ValueError for syntax/parse errors.""" diff --git a/src/debputy/maintscript_snippet.py b/src/debputy/maintscript_snippet.py new file mode 100644 index 0000000..ca81ca5 --- /dev/null +++ b/src/debputy/maintscript_snippet.py @@ -0,0 +1,184 @@ +import dataclasses +from typing import Sequence, Optional, List, Literal, Iterable, Dict, Self + +from debputy.manifest_parser.base_types import DebputyDispatchableType +from debputy.manifest_parser.util import AttributePath + +STD_CONTROL_SCRIPTS = frozenset( + { + "preinst", + "prerm", + "postinst", + "postrm", + } +) +UDEB_CONTROL_SCRIPTS = frozenset( + { + "postinst", + "menutest", + "isinstallable", + } +) +ALL_CONTROL_SCRIPTS = STD_CONTROL_SCRIPTS | UDEB_CONTROL_SCRIPTS | {"config"} + + +@dataclasses.dataclass(slots=True, frozen=True) +class MaintscriptSnippet: + definition_source: str + snippet: str + snippet_order: Optional[Literal["service"]] = None + + def script_content(self) -> str: + lines = [ + f"# Snippet source: {self.definition_source}\n", + self.snippet, + ] + if not self.snippet.endswith("\n"): + lines.append("\n") + return "".join(lines) + + +class MaintscriptSnippetContainer: + def __init__(self) -> None: + self._generic_snippets: List[MaintscriptSnippet] = [] + self._snippets_by_order: Dict[Literal["service"], List[MaintscriptSnippet]] = {} + + def copy(self) -> "MaintscriptSnippetContainer": + instance = self.__class__() + instance._generic_snippets = self._generic_snippets.copy() + instance._snippets_by_order = self._snippets_by_order.copy() + return instance + + def append(self, maintscript_snippet: MaintscriptSnippet) -> None: + if maintscript_snippet.snippet_order is None: + self._generic_snippets.append(maintscript_snippet) + else: + if maintscript_snippet.snippet_order not in self._snippets_by_order: + self._snippets_by_order[maintscript_snippet.snippet_order] = [] + self._snippets_by_order[maintscript_snippet.snippet_order].append( + maintscript_snippet + ) + + def has_content(self, snippet_order: Optional[Literal["service"]] = None) -> bool: + if snippet_order is None: + return bool(self._generic_snippets) + if snippet_order not in self._snippets_by_order: + return False + return bool(self._snippets_by_order[snippet_order]) + + def all_snippets(self) -> Iterable[MaintscriptSnippet]: + yield from self._generic_snippets + for snippets in self._snippets_by_order.values(): + yield from snippets + + def generate_snippet( + self, + tool_with_version: Optional[str] = None, + snippet_order: Optional[Literal["service"]] = None, + reverse: bool = False, + ) -> Optional[str]: + inner_content = "" + if snippet_order is None: + snippets = ( + reversed(self._generic_snippets) if reverse else self._generic_snippets + ) + inner_content = "".join(s.script_content() for s in snippets) + elif snippet_order in self._snippets_by_order: + snippets = self._snippets_by_order[snippet_order] + if reverse: + snippets = reversed(snippets) + inner_content = "".join(s.script_content() for s in snippets) + + if not inner_content: + return None + + if tool_with_version: + return ( + f"# Automatically added by {tool_with_version}\n" + + inner_content + + "# End automatically added section" + ) + return inner_content + + +class DpkgMaintscriptHelperCommand(DebputyDispatchableType): + __slots__ = ("cmdline", "definition_source") + + def __init__(self, cmdline: Sequence[str], definition_source: str): + self.cmdline = cmdline + self.definition_source = definition_source + + @classmethod + def _finish_cmd( + cls, + definition_source: str, + cmdline: List[str], + prior_version: Optional[str], + owning_package: Optional[str], + ) -> Self: + if prior_version is not None: + cmdline.append(prior_version) + if owning_package is not None: + if prior_version is None: + # Empty is allowed according to `man dpkg-maintscript-helper` + cmdline.append("") + cmdline.append(owning_package) + return cls( + tuple(cmdline), + definition_source, + ) + + @classmethod + def rm_conffile( + cls, + definition_source: AttributePath, + conffile: str, + prior_version: Optional[str] = None, + owning_package: Optional[str] = None, + ) -> Self: + cmdline = ["rm_conffile", conffile] + return cls._finish_cmd( + definition_source.path, cmdline, prior_version, owning_package + ) + + @classmethod + def mv_conffile( + cls, + definition_source: AttributePath, + old_conffile: str, + new_confile: str, + prior_version: Optional[str] = None, + owning_package: Optional[str] = None, + ) -> Self: + cmdline = ["mv_conffile", old_conffile, new_confile] + return cls._finish_cmd( + definition_source.path, cmdline, prior_version, owning_package + ) + + @classmethod + def symlink_to_dir( + cls, + definition_source: AttributePath, + pathname: str, + old_target: str, + prior_version: Optional[str] = None, + owning_package: Optional[str] = None, + ) -> Self: + cmdline = ["symlink_to_dir", pathname, old_target] + return cls._finish_cmd( + definition_source.path, cmdline, prior_version, owning_package + ) + + @classmethod + def dir_to_symlink( + cls, + definition_source: AttributePath, + pathname: str, + new_target: str, + prior_version: Optional[str] = None, + owning_package: Optional[str] = None, + ) -> Self: + cmdline = ["dir_to_symlink", pathname, new_target] + return cls._finish_cmd( + definition_source.path, cmdline, prior_version, owning_package + ) diff --git a/src/debputy/manifest_conditions.py b/src/debputy/manifest_conditions.py new file mode 100644 index 0000000..0f5c298 --- /dev/null +++ b/src/debputy/manifest_conditions.py @@ -0,0 +1,239 @@ +import dataclasses +from enum import Enum +from typing import List, Callable, Optional, Sequence + +from debian.debian_support import DpkgArchTable + +from debputy._deb_options_profiles import DebBuildOptionsAndProfiles +from debputy.architecture_support import DpkgArchitectureBuildProcessValuesTable +from debputy.manifest_parser.base_types import DebputyDispatchableType +from debputy.packages import BinaryPackage +from debputy.substitution import Substitution +from debputy.util import active_profiles_match + + +@dataclasses.dataclass(slots=True, frozen=True) +class ConditionContext: + binary_package: Optional[BinaryPackage] + build_env: DebBuildOptionsAndProfiles + substitution: Substitution + dpkg_architecture_variables: DpkgArchitectureBuildProcessValuesTable + dpkg_arch_query_table: DpkgArchTable + + +class ManifestCondition(DebputyDispatchableType): + __slots__ = () + + def describe(self) -> str: + raise NotImplementedError + + def negated(self) -> "ManifestCondition": + return NegatedManifestCondition(self) + + def evaluate(self, context: ConditionContext) -> bool: + raise NotImplementedError + + @classmethod + def _manifest_group( + cls, + match_type: "_ConditionGroupMatchType", + conditions: "Sequence[ManifestCondition]", + ) -> "ManifestCondition": + condition = conditions[0] + if ( + isinstance(condition, ManifestConditionGroup) + and condition.match_type == match_type + ): + return condition.extend(conditions[1:]) + return ManifestConditionGroup(match_type, conditions) + + @classmethod + def any_of(cls, conditions: "Sequence[ManifestCondition]") -> "ManifestCondition": + return cls._manifest_group(_ConditionGroupMatchType.ANY_OF, conditions) + + @classmethod + def all_of(cls, conditions: "Sequence[ManifestCondition]") -> "ManifestCondition": + return cls._manifest_group(_ConditionGroupMatchType.ALL_OF, conditions) + + @classmethod + def is_cross_building(cls) -> "ManifestCondition": + return _IS_CROSS_BUILDING + + @classmethod + def can_execute_compiled_binaries(cls) -> "ManifestCondition": + return _CAN_EXECUTE_COMPILED_BINARIES + + @classmethod + def run_build_time_tests(cls) -> "ManifestCondition": + return _RUN_BUILD_TIME_TESTS + + +class NegatedManifestCondition(ManifestCondition): + __slots__ = ("_condition",) + + def __init__(self, condition: ManifestCondition) -> None: + self._condition = condition + + def negated(self) -> "ManifestCondition": + return self._condition + + def describe(self) -> str: + return f"not ({self._condition.describe()})" + + def evaluate(self, context: ConditionContext) -> bool: + return not self._condition.evaluate(context) + + +class _ConditionGroupMatchType(Enum): + ANY_OF = (any, "At least one of: [{conditions}]") + ALL_OF = (all, "All of: [{conditions}]") + + def describe(self, conditions: Sequence[ManifestCondition]) -> str: + return self.value[1].format( + conditions=", ".join(x.describe() for x in conditions) + ) + + def evaluate( + self, conditions: Sequence[ManifestCondition], context: ConditionContext + ) -> bool: + return self.value[0](c.evaluate(context) for c in conditions) + + +class ManifestConditionGroup(ManifestCondition): + __slots__ = ("match_type", "_conditions") + + def __init__( + self, + match_type: _ConditionGroupMatchType, + conditions: Sequence[ManifestCondition], + ) -> None: + self.match_type = match_type + self._conditions = conditions + + def describe(self) -> str: + return self.match_type.describe(self._conditions) + + def evaluate(self, context: ConditionContext) -> bool: + return self.match_type.evaluate(self._conditions, context) + + def extend( + self, + conditions: Sequence[ManifestCondition], + ) -> "ManifestConditionGroup": + combined = list(self._conditions) + combined.extend(conditions) + return ManifestConditionGroup( + self.match_type, + combined, + ) + + +class ArchMatchManifestConditionBase(ManifestCondition): + __slots__ = ("_arch_spec", "_is_negated") + + def __init__(self, arch_spec: List[str], *, is_negated: bool = False) -> None: + self._arch_spec = arch_spec + self._is_negated = is_negated + + def negated(self) -> "ManifestCondition": + return self.__class__(self._arch_spec, is_negated=not self._is_negated) + + +class SourceContextArchMatchManifestCondition(ArchMatchManifestConditionBase): + def describe(self) -> str: + if self._is_negated: + return f'architecture (for source package) matches *none* of [{", ".join(self._arch_spec)}]' + return f'architecture (for source package) matches any of [{", ".join(self._arch_spec)}]' + + def evaluate(self, context: ConditionContext) -> bool: + arch = context.dpkg_architecture_variables.current_host_arch + match = context.dpkg_arch_query_table.architecture_is_concerned( + arch, self._arch_spec + ) + return not match if self._is_negated else match + + +class BinaryPackageContextArchMatchManifestCondition(ArchMatchManifestConditionBase): + def describe(self) -> str: + if self._is_negated: + return f'architecture (for binary package) matches *none* of [{", ".join(self._arch_spec)}]' + return f'architecture (for binary package) matches any of [{", ".join(self._arch_spec)}]' + + def evaluate(self, context: ConditionContext) -> bool: + binary_package = context.binary_package + if binary_package is None: + raise RuntimeError( + "Condition only applies in the context of a BinaryPackage, but was evaluated" + " without one" + ) + arch = binary_package.resolved_architecture + match = context.dpkg_arch_query_table.architecture_is_concerned( + arch, self._arch_spec + ) + return not match if self._is_negated else match + + +class BuildProfileMatch(ManifestCondition): + __slots__ = ("_profile_spec", "_is_negated") + + def __init__(self, profile_spec: str, *, is_negated: bool = False) -> None: + self._profile_spec = profile_spec + self._is_negated = is_negated + + def negated(self) -> "ManifestCondition": + return self.__class__(self._profile_spec, is_negated=not self._is_negated) + + def describe(self) -> str: + if self._is_negated: + return f"DEB_BUILD_PROFILES matches *none* of [{self._profile_spec}]" + return f"DEB_BUILD_PROFILES matches any of [{self._profile_spec}]" + + def evaluate(self, context: ConditionContext) -> bool: + match = active_profiles_match( + self._profile_spec, context.build_env.deb_build_profiles + ) + return not match if self._is_negated else match + + +@dataclasses.dataclass(frozen=True, slots=True) +class _SingletonCondition(ManifestCondition): + description: str + implementation: Callable[[ConditionContext], bool] + + def describe(self) -> str: + return self.description + + def evaluate(self, context: ConditionContext) -> bool: + return self.implementation(context) + + +def _can_run_built_binaries(context: ConditionContext) -> bool: + if not context.dpkg_architecture_variables.is_cross_compiling: + return True + # User / Builder asserted that we could even though we are cross-compiling, so we have to assume it is true + return "crossbuildcanrunhostbinaries" in context.build_env.deb_build_options + + +_IS_CROSS_BUILDING = _SingletonCondition( + "Cross Compiling (i.e., DEB_HOST_GNU_TYPE != DEB_BUILD_GNU_TYPE)", + lambda c: c.dpkg_architecture_variables.is_cross_compiling, +) + +_CAN_EXECUTE_COMPILED_BINARIES = _SingletonCondition( + "Can run built binaries (natively or via transparent emulation)", + _can_run_built_binaries, +) + +_RUN_BUILD_TIME_TESTS = _SingletonCondition( + "Run build time tests", + lambda c: "nocheck" not in c.build_env.deb_build_options, +) + +_BUILD_DOCS_BDO = _SingletonCondition( + "Build docs (nodocs not in DEB_BUILD_OPTIONS)", + lambda c: "nodocs" not in c.build_env.deb_build_options, +) + + +del _SingletonCondition +del _can_run_built_binaries diff --git a/src/debputy/manifest_parser/__init__.py b/src/debputy/manifest_parser/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/manifest_parser/__init__.py diff --git a/src/debputy/manifest_parser/base_types.py b/src/debputy/manifest_parser/base_types.py new file mode 100644 index 0000000..865e320 --- /dev/null +++ b/src/debputy/manifest_parser/base_types.py @@ -0,0 +1,440 @@ +import dataclasses +import os +from functools import lru_cache +from typing import ( + TypedDict, + NotRequired, + Sequence, + Optional, + Union, + Literal, + Tuple, + Mapping, + Iterable, + TYPE_CHECKING, + Callable, + Type, + Generic, +) + +from debputy.manifest_parser.exceptions import ManifestParseException +from debputy.manifest_parser.util import ( + AttributePath, + _SymbolicModeSegment, + parse_symbolic_mode, +) +from debputy.path_matcher import MatchRule, ExactFileSystemPath +from debputy.substitution import Substitution +from debputy.types import S +from debputy.util import _normalize_path, T + +if TYPE_CHECKING: + from debputy.manifest_conditions import ManifestCondition + from debputy.manifest_parser.parser_data import ParserContextData + + +class DebputyParsedContent(TypedDict): + pass + + +class DebputyDispatchableType: + __slots__ = () + + +class DebputyParsedContentStandardConditional(DebputyParsedContent): + when: NotRequired["ManifestCondition"] + + +@dataclasses.dataclass(slots=True, frozen=True) +class OwnershipDefinition: + entity_name: str + entity_id: int + + +@dataclasses.dataclass +class TypeMapping(Generic[S, T]): + target_type: Type[T] + source_type: Type[S] + mapper: Callable[[S, AttributePath, Optional["ParserContextData"]], T] + + +ROOT_DEFINITION = OwnershipDefinition("root", 0) + + +BAD_OWNER_NAMES = { + "_apt", # All things owned by _apt are generated by apt after installation + "nogroup", # It is not supposed to own anything as it is an entity used for dropping permissions + "nobody", # It is not supposed to own anything as it is an entity used for dropping permissions +} +BAD_OWNER_IDS = { + 65534, # ID of nobody / nogroup +} + + +def _parse_ownership( + v: Union[str, int], + attribute_path: AttributePath, +) -> Tuple[Optional[str], Optional[int]]: + if isinstance(v, str) and ":" in v: + if v == ":": + raise ManifestParseException( + f'Invalid ownership value "{v}" at {attribute_path.path}: Ownership is redundant if it is ":"' + f" (blank name and blank id). Please provide non-default values or remove the definition." + ) + entity_name: Optional[str] + entity_id: Optional[int] + entity_name, entity_id_str = v.split(":") + if entity_name == "": + entity_name = None + if entity_id_str != "": + entity_id = int(entity_id_str) + else: + entity_id = None + return entity_name, entity_id + + if isinstance(v, int): + return None, v + if v.isdigit(): + raise ManifestParseException( + f'Invalid ownership value "{v}" at {attribute_path.path}: The provided value "{v}" is a string (implying' + " name lookup), but it contains an integer (implying id lookup). Please use a regular int for id lookup" + f' (removing the quotes) or add a ":" in the end ("{v}:") as a disambiguation if you are *really* looking' + " for an entity with that name." + ) + return v, None + + +@lru_cache +def _load_ownership_table_from_file( + name: Literal["passwd.master", "group.master"], +) -> Tuple[Mapping[str, OwnershipDefinition], Mapping[int, OwnershipDefinition]]: + filename = os.path.join("/usr/share/base-passwd", name) + name_table = {} + uid_table = {} + for owner_def in _read_ownership_def_from_base_password_template(filename): + # Could happen if base-passwd template has two users with the same ID. We assume this will not occur. + assert owner_def.entity_name not in name_table + assert owner_def.entity_id not in uid_table + name_table[owner_def.entity_name] = owner_def + uid_table[owner_def.entity_id] = owner_def + + return name_table, uid_table + + +def _read_ownership_def_from_base_password_template( + template_file: str, +) -> Iterable[OwnershipDefinition]: + with open(template_file) as fd: + for line in fd: + entity_name, _star, entity_id, _remainder = line.split(":", 3) + if entity_id == "0" and entity_name == "root": + yield ROOT_DEFINITION + else: + yield OwnershipDefinition(entity_name, int(entity_id)) + + +class FileSystemMode: + @classmethod + def parse_filesystem_mode( + cls, + mode_raw: str, + attribute_path: AttributePath, + ) -> "FileSystemMode": + if mode_raw and mode_raw[0].isdigit(): + return OctalMode.parse_filesystem_mode(mode_raw, attribute_path) + return SymbolicMode.parse_filesystem_mode(mode_raw, attribute_path) + + def compute_mode(self, current_mode: int, is_dir: bool) -> int: + raise NotImplementedError + + +@dataclasses.dataclass(slots=True, frozen=True) +class SymbolicMode(FileSystemMode): + provided_mode: str + segments: Sequence[_SymbolicModeSegment] + + @classmethod + def parse_filesystem_mode( + cls, + mode_raw: str, + attribute_path: AttributePath, + ) -> "SymbolicMode": + segments = list(parse_symbolic_mode(mode_raw, attribute_path)) + return SymbolicMode(mode_raw, segments) + + def __str__(self) -> str: + return self.symbolic_mode() + + @property + def is_symbolic_mode(self) -> bool: + return False + + def symbolic_mode(self) -> str: + return self.provided_mode + + def compute_mode(self, current_mode: int, is_dir: bool) -> int: + final_mode = current_mode + for segment in self.segments: + final_mode = segment.apply(final_mode, is_dir) + return final_mode + + +@dataclasses.dataclass(slots=True, frozen=True) +class OctalMode(FileSystemMode): + octal_mode: int + + @classmethod + def parse_filesystem_mode( + cls, + mode_raw: str, + attribute_path: AttributePath, + ) -> "FileSystemMode": + try: + mode = int(mode_raw, base=8) + except ValueError as e: + error_msg = 'An octal mode must be all digits between 0-7 (such as "644")' + raise ManifestParseException( + f"Cannot parse {attribute_path.path} as an octal mode: {error_msg}" + ) from e + return OctalMode(mode) + + @property + def is_octal_mode(self) -> bool: + return True + + def compute_mode(self, _current_mode: int, _is_dir: bool) -> int: + return self.octal_mode + + def __str__(self) -> str: + return f"0{oct(self.octal_mode)[2:]}" + + +@dataclasses.dataclass(slots=True, frozen=True) +class _StaticFileSystemOwnerGroup: + ownership_definition: OwnershipDefinition + + @property + def entity_name(self) -> str: + return self.ownership_definition.entity_name + + @property + def entity_id(self) -> int: + return self.ownership_definition.entity_id + + @classmethod + def from_manifest_value( + cls, + raw_input: Union[str, int], + attribute_path: AttributePath, + ) -> "_StaticFileSystemOwnerGroup": + provided_name, provided_id = _parse_ownership(raw_input, attribute_path) + owner_def = cls._resolve(raw_input, provided_name, provided_id, attribute_path) + if ( + owner_def.entity_name in BAD_OWNER_NAMES + or owner_def.entity_id in BAD_OWNER_IDS + ): + raise ManifestParseException( + f'Refusing to use "{raw_input}" as {cls._owner_type()} (defined at {attribute_path.path})' + f' as it resolves to "{owner_def.entity_name}:{owner_def.entity_id}" and no path should have this' + f" entity as {cls._owner_type()} as it is unsafe." + ) + return cls(owner_def) + + @classmethod + def _resolve( + cls, + raw_input: Union[str, int], + provided_name: Optional[str], + provided_id: Optional[int], + attribute_path: AttributePath, + ) -> OwnershipDefinition: + table_name = cls._ownership_table_name() + name_table, id_table = _load_ownership_table_from_file(table_name) + name_match = ( + name_table.get(provided_name) if provided_name is not None else None + ) + id_match = id_table.get(provided_id) if provided_id is not None else None + if id_match is None and name_match is None: + name_part = provided_name if provided_name is not None else "N/A" + id_part = provided_id if provided_id is not None else "N/A" + raise ManifestParseException( + f'Cannot resolve "{raw_input}" as {cls._owner_type()} (from {attribute_path.path}):' + f" It is not known to be a static {cls._owner_type()} from base-passwd." + f' The value was interpreted as name: "{name_part}" and id: {id_part}' + ) + if id_match is None: + assert name_match is not None + return name_match + if name_match is None: + assert id_match is not None + return id_match + if provided_name != id_match.entity_name: + raise ManifestParseException( + f"Bad {cls._owner_type()} declaration: The id {provided_id} resolves to {id_match.entity_name}" + f" according to base-passwd, but the packager declared to should have been {provided_name}" + f" at {attribute_path.path}" + ) + if provided_id != name_match.entity_id: + raise ManifestParseException( + f"Bad {cls._owner_type} declaration: The name {provided_name} resolves to {name_match.entity_id}" + f" according to base-passwd, but the packager declared to should have been {provided_id}" + f" at {attribute_path.path}" + ) + return id_match + + @classmethod + def _owner_type(cls) -> Literal["owner", "group"]: + raise NotImplementedError + + @classmethod + def _ownership_table_name(cls) -> Literal["passwd.master", "group.master"]: + raise NotImplementedError + + +class StaticFileSystemOwner(_StaticFileSystemOwnerGroup): + @classmethod + def _owner_type(cls) -> Literal["owner", "group"]: + return "owner" + + @classmethod + def _ownership_table_name(cls) -> Literal["passwd.master", "group.master"]: + return "passwd.master" + + +class StaticFileSystemGroup(_StaticFileSystemOwnerGroup): + @classmethod + def _owner_type(cls) -> Literal["owner", "group"]: + return "group" + + @classmethod + def _ownership_table_name(cls) -> Literal["passwd.master", "group.master"]: + return "group.master" + + +@dataclasses.dataclass(slots=True, frozen=True) +class SymlinkTarget: + raw_symlink_target: str + attribute_path: AttributePath + symlink_target: str + + @classmethod + def parse_symlink_target( + cls, + raw_symlink_target: str, + attribute_path: AttributePath, + substitution: Substitution, + ) -> "SymlinkTarget": + return SymlinkTarget( + raw_symlink_target, + attribute_path, + substitution.substitute(raw_symlink_target, attribute_path.path), + ) + + +class FileSystemMatchRule: + @property + def raw_match_rule(self) -> str: + raise NotImplementedError + + @property + def attribute_path(self) -> AttributePath: + raise NotImplementedError + + @property + def match_rule(self) -> MatchRule: + raise NotImplementedError + + @classmethod + def parse_path_match( + cls, + raw_match_rule: str, + attribute_path: AttributePath, + parser_context: "ParserContextData", + ) -> "FileSystemMatchRule": + return cls.from_path_match( + raw_match_rule, attribute_path, parser_context.substitution + ) + + @classmethod + def from_path_match( + cls, + raw_match_rule: str, + attribute_path: AttributePath, + substitution: "Substitution", + ) -> "FileSystemMatchRule": + try: + mr = MatchRule.from_path_or_glob( + raw_match_rule, + attribute_path.path, + substitution=substitution, + ) + except ValueError as e: + raise ManifestParseException( + f'Could not parse "{raw_match_rule}" (defined at {attribute_path.path})' + f" as a path or a glob: {e.args[0]}" + ) + + if isinstance(mr, ExactFileSystemPath): + return FileSystemExactMatchRule( + raw_match_rule, + attribute_path, + mr, + ) + return FileSystemGenericMatch( + raw_match_rule, + attribute_path, + mr, + ) + + +@dataclasses.dataclass(slots=True, frozen=True) +class FileSystemGenericMatch(FileSystemMatchRule): + raw_match_rule: str + attribute_path: AttributePath + match_rule: MatchRule + + +@dataclasses.dataclass(slots=True, frozen=True) +class FileSystemExactMatchRule(FileSystemMatchRule): + raw_match_rule: str + attribute_path: AttributePath + match_rule: ExactFileSystemPath + + @classmethod + def from_path_match( + cls, + raw_match_rule: str, + attribute_path: AttributePath, + substitution: "Substitution", + ) -> "FileSystemExactMatchRule": + try: + normalized = _normalize_path(raw_match_rule) + except ValueError as e: + raise ManifestParseException( + f'The path "{raw_match_rule}" provided in {attribute_path.path} should be relative to the' + ' root of the package and not use any ".." or "." segments.' + ) from e + if normalized == ".": + raise ManifestParseException( + f'The path "{raw_match_rule}" matches a file system root and that is not a valid match' + f' at "{attribute_path.path}". Please narrow the provided path.' + ) + mr = ExactFileSystemPath( + substitution.substitute(normalized, attribute_path.path) + ) + if mr.path.endswith("/") and issubclass(cls, FileSystemExactNonDirMatchRule): + raise ManifestParseException( + f'The path "{raw_match_rule}" at {attribute_path.path} resolved to' + f' "{mr.path}". Since the resolved path ends with a slash ("/"), this' + " means only a directory can match. However, this attribute should" + " match a *non*-directory" + ) + return cls( + raw_match_rule, + attribute_path, + mr, + ) + + +class FileSystemExactNonDirMatchRule(FileSystemExactMatchRule): + pass diff --git a/src/debputy/manifest_parser/declarative_parser.py b/src/debputy/manifest_parser/declarative_parser.py new file mode 100644 index 0000000..32e93fe --- /dev/null +++ b/src/debputy/manifest_parser/declarative_parser.py @@ -0,0 +1,1893 @@ +import collections +import dataclasses +import itertools +from typing import ( + Any, + Callable, + Tuple, + TypedDict, + Dict, + get_type_hints, + Annotated, + get_args, + get_origin, + TypeVar, + Generic, + FrozenSet, + Mapping, + Optional, + cast, + is_typeddict, + Type, + Union, + List, + Collection, + NotRequired, + Iterable, + Literal, + Sequence, +) + +from debputy.manifest_parser.base_types import ( + DebputyParsedContent, + StaticFileSystemOwner, + StaticFileSystemGroup, + FileSystemMode, + OctalMode, + SymlinkTarget, + FileSystemMatchRule, + FileSystemExactMatchRule, + FileSystemExactNonDirMatchRule, + DebputyDispatchableType, + TypeMapping, +) +from debputy.manifest_parser.exceptions import ( + ManifestParseException, +) +from debputy.manifest_parser.mapper_code import ( + type_mapper_str2package, + normalize_into_list, + wrap_into_list, + map_each_element, +) +from debputy.manifest_parser.parser_data import ParserContextData +from debputy.manifest_parser.util import AttributePath, unpack_type, find_annotation +from debputy.packages import BinaryPackage +from debputy.plugin.api.impl_types import ( + DeclarativeInputParser, + TD, + _ALL_PACKAGE_TYPES, + resolve_package_type_selectors, +) +from debputy.plugin.api.spec import ParserDocumentation, PackageTypeSelector +from debputy.util import _info, _warn, assume_not_none + +try: + from Levenshtein import distance +except ImportError: + _WARN_ONCE = False + + def _detect_possible_typo( + _key: str, + _value: object, + _manifest_attributes: Mapping[str, "AttributeDescription"], + _path: "AttributePath", + ) -> None: + global _WARN_ONCE + if not _WARN_ONCE: + _WARN_ONCE = True + _info( + "Install python3-levenshtein to have debputy try to detect typos in the manifest." + ) + +else: + + def _detect_possible_typo( + key: str, + value: object, + manifest_attributes: Mapping[str, "AttributeDescription"], + path: "AttributePath", + ) -> None: + k_len = len(key) + key_path = path[key] + matches: List[str] = [] + current_match_strength = 0 + for acceptable_key, attr in manifest_attributes.items(): + if abs(k_len - len(acceptable_key)) > 2: + continue + d = distance(key, acceptable_key) + if d > 2: + continue + try: + attr.type_validator.ensure_type(value, key_path) + except ManifestParseException: + if attr.type_validator.base_type_match(value): + match_strength = 1 + else: + match_strength = 0 + else: + match_strength = 2 + + if match_strength < current_match_strength: + continue + if match_strength > current_match_strength: + current_match_strength = match_strength + matches.clear() + matches.append(acceptable_key) + + if not matches: + return + ref = f'at "{path.path}"' if path else "at the manifest root level" + if len(matches) == 1: + possible_match = repr(matches[0]) + _warn( + f'Possible typo: The key "{key}" {ref} should probably have been {possible_match}' + ) + else: + matches.sort() + possible_matches = ", ".join(repr(a) for a in matches) + _warn( + f'Possible typo: The key "{key}" {ref} should probably have been one of {possible_matches}' + ) + + +SF = TypeVar("SF") +T = TypeVar("T") +S = TypeVar("S") + + +_NONE_TYPE = type(None) + + +# These must be able to appear in an "isinstance" check and must be builtin types. +BASIC_SIMPLE_TYPES = { + str: "string", + int: "integer", + bool: "boolean", +} + + +class AttributeTypeHandler: + __slots__ = ("_description", "_ensure_type", "base_type", "mapper") + + def __init__( + self, + description: str, + ensure_type: Callable[[Any, AttributePath], None], + *, + base_type: Optional[Type[Any]] = None, + mapper: Optional[ + Callable[[Any, AttributePath, Optional["ParserContextData"]], Any] + ] = None, + ) -> None: + self._description = description + self._ensure_type = ensure_type + self.base_type = base_type + self.mapper = mapper + + def describe_type(self) -> str: + return self._description + + def ensure_type(self, obj: object, path: AttributePath) -> None: + self._ensure_type(obj, path) + + def base_type_match(self, obj: object) -> bool: + base_type = self.base_type + return base_type is not None and isinstance(obj, base_type) + + def map_type( + self, + value: Any, + path: AttributePath, + parser_context: Optional["ParserContextData"], + ) -> Any: + mapper = self.mapper + if mapper is not None: + return mapper(value, path, parser_context) + return value + + def combine_mapper( + self, + mapper: Optional[ + Callable[[Any, AttributePath, Optional["ParserContextData"]], Any] + ], + ) -> "AttributeTypeHandler": + if mapper is None: + return self + if self.mapper is not None: + m = self.mapper + + def _combined_mapper( + value: Any, + path: AttributePath, + parser_context: Optional["ParserContextData"], + ) -> Any: + return mapper(m(value, path, parser_context), path, parser_context) + + else: + _combined_mapper = mapper + + return AttributeTypeHandler( + self._description, + self._ensure_type, + base_type=self.base_type, + mapper=_combined_mapper, + ) + + +@dataclasses.dataclass(slots=True) +class AttributeDescription: + source_attribute_name: str + target_attribute: str + attribute_type: Any + type_validator: AttributeTypeHandler + annotations: Tuple[Any, ...] + conflicting_attributes: FrozenSet[str] + conditional_required: Optional["ConditionalRequired"] + parse_hints: Optional["DetectedDebputyParseHint"] = None + is_optional: bool = False + + +def _extract_path_hint(v: Any, attribute_path: AttributePath) -> bool: + if attribute_path.path_hint is not None: + return True + if isinstance(v, str): + attribute_path.path_hint = v + return True + elif isinstance(v, list) and len(v) > 0 and isinstance(v[0], str): + attribute_path.path_hint = v[0] + return True + return False + + +@dataclasses.dataclass(slots=True, frozen=True) +class DeclarativeNonMappingInputParser(DeclarativeInputParser[TD], Generic[TD, SF]): + alt_form_parser: AttributeDescription + inline_reference_documentation: Optional[ParserDocumentation] = None + + def parse_input( + self, + value: object, + path: AttributePath, + *, + parser_context: Optional["ParserContextData"] = None, + ) -> TD: + if self.reference_documentation_url is not None: + doc_ref = f" (Documentation: {self.reference_documentation_url})" + else: + doc_ref = "" + + alt_form_parser = self.alt_form_parser + if value is None: + form_note = f" The value must have type: {alt_form_parser.type_validator.describe_type()}" + if self.reference_documentation_url is not None: + doc_ref = f" Please see {self.reference_documentation_url} for the documentation." + raise ManifestParseException( + f"The attribute {path.path} was missing a value. {form_note}{doc_ref}" + ) + _extract_path_hint(value, path) + alt_form_parser.type_validator.ensure_type(value, path) + attribute = alt_form_parser.target_attribute + alias_mapping = { + attribute: ("", None), + } + v = alt_form_parser.type_validator.map_type(value, path, parser_context) + path.alias_mapping = alias_mapping + return cast("TD", {attribute: v}) + + +@dataclasses.dataclass(slots=True) +class DeclarativeMappingInputParser(DeclarativeInputParser[TD], Generic[TD, SF]): + input_time_required_parameters: FrozenSet[str] + all_parameters: FrozenSet[str] + manifest_attributes: Mapping[str, "AttributeDescription"] + source_attributes: Mapping[str, "AttributeDescription"] + at_least_one_of: FrozenSet[FrozenSet[str]] + alt_form_parser: Optional[AttributeDescription] + mutually_exclusive_attributes: FrozenSet[FrozenSet[str]] = frozenset() + _per_attribute_conflicts_cache: Optional[Mapping[str, FrozenSet[str]]] = None + inline_reference_documentation: Optional[ParserDocumentation] = None + path_hint_source_attributes: Sequence[str] = tuple() + + def parse_input( + self, + value: object, + path: AttributePath, + *, + parser_context: Optional["ParserContextData"] = None, + ) -> TD: + if self.reference_documentation_url is not None: + doc_ref = f" (Documentation: {self.reference_documentation_url})" + else: + doc_ref = "" + if value is None: + form_note = " The attribute must be a mapping." + if self.alt_form_parser is not None: + form_note = ( + " The attribute can be a mapping or a non-mapping format" + ' (usually, "non-mapping format" means a string or a list of strings).' + ) + if self.reference_documentation_url is not None: + doc_ref = f" Please see {self.reference_documentation_url} for the documentation." + raise ManifestParseException( + f"The attribute {path.path} was missing a value. {form_note}{doc_ref}" + ) + if not isinstance(value, dict): + alt_form_parser = self.alt_form_parser + if alt_form_parser is None: + raise ManifestParseException( + f"The attribute {path.path} must be a mapping.{doc_ref}" + ) + _extract_path_hint(value, path) + alt_form_parser.type_validator.ensure_type(value, path) + assert ( + value is not None + ), "The alternative form was None, but the parser should have rejected None earlier." + attribute = alt_form_parser.target_attribute + alias_mapping = { + attribute: ("", None), + } + v = alt_form_parser.type_validator.map_type(value, path, parser_context) + path.alias_mapping = alias_mapping + return cast("TD", {attribute: v}) + + unknown_keys = value.keys() - self.all_parameters + if unknown_keys: + for k in unknown_keys: + if isinstance(k, str): + _detect_possible_typo(k, value[k], self.manifest_attributes, path) + unused_keys = self.all_parameters - value.keys() + if unused_keys: + k = ", ".join(unused_keys) + raise ManifestParseException( + f'Unknown keys "{unknown_keys}" at {path.path}". Keys that could be used here are: {k}.{doc_ref}' + ) + raise ManifestParseException( + f'Unknown keys "{unknown_keys}" at {path.path}". Please remove them.{doc_ref}' + ) + missing_keys = self.input_time_required_parameters - value.keys() + if missing_keys: + required = ", ".join(repr(k) for k in sorted(missing_keys)) + raise ManifestParseException( + f"The following keys were required but not present at {path.path}: {required}{doc_ref}" + ) + for maybe_required in self.all_parameters - value.keys(): + attr = self.manifest_attributes[maybe_required] + assert attr.conditional_required is None or parser_context is not None + if ( + attr.conditional_required is not None + and attr.conditional_required.condition_applies( + assume_not_none(parser_context) + ) + ): + reason = attr.conditional_required.reason + raise ManifestParseException( + f'Missing the *conditionally* required attribute "{maybe_required}" at {path.path}. {reason}{doc_ref}' + ) + for keyset in self.at_least_one_of: + matched_keys = value.keys() & keyset + if not matched_keys: + conditionally_required = ", ".join(repr(k) for k in sorted(keyset)) + raise ManifestParseException( + f"At least one of the following keys must be present at {path.path}:" + f" {conditionally_required}{doc_ref}" + ) + for group in self.mutually_exclusive_attributes: + matched = value.keys() & group + if len(matched) > 1: + ck = ", ".join(repr(k) for k in sorted(matched)) + raise ManifestParseException( + f"Could not parse {path.path}: The following attributes are" + f" mutually exclusive: {ck}{doc_ref}" + ) + result = {} + per_attribute_conflicts = self._per_attribute_conflicts() + alias_mapping = {} + for path_hint_source_attributes in self.path_hint_source_attributes: + v = value.get(path_hint_source_attributes) + if v is not None and _extract_path_hint(v, path): + break + for k, v in value.items(): + attr = self.manifest_attributes[k] + matched = value.keys() & per_attribute_conflicts[k] + if matched: + ck = ", ".join(repr(k) for k in sorted(matched)) + raise ManifestParseException( + f'The attribute "{k}" at {path.path} cannot be used with the following' + f" attributes: {ck}{doc_ref}" + ) + nk = attr.target_attribute + key_path = path[k] + attr.type_validator.ensure_type(v, key_path) + if v is None: + continue + if k != nk: + alias_mapping[nk] = k, None + v = attr.type_validator.map_type(v, key_path, parser_context) + result[nk] = v + if alias_mapping: + path.alias_mapping = alias_mapping + return cast("TD", result) + + def _per_attribute_conflicts(self) -> Mapping[str, FrozenSet[str]]: + conflicts = self._per_attribute_conflicts_cache + if conflicts is not None: + return conflicts + attrs = self.source_attributes + conflicts = { + a.source_attribute_name: frozenset( + attrs[ca].source_attribute_name for ca in a.conflicting_attributes + ) + for a in attrs.values() + } + self._per_attribute_conflicts_cache = conflicts + return self._per_attribute_conflicts_cache + + +class DebputyParseHint: + @classmethod + def target_attribute(cls, target_attribute: str) -> "DebputyParseHint": + """Define this source attribute to have a different target attribute name + + As an example: + + >>> class SourceType(TypedDict): + ... source: Annotated[NotRequired[str], DebputyParseHint.target_attribute("sources")] + ... sources: NotRequired[List[str]] + >>> class TargetType(TypedDict): + ... sources: List[str] + >>> pg = ParserGenerator() + >>> parser = pg.parser_from_typed_dict(TargetType, source_content=SourceType) + + In this example, the user can provide either `source` or `sources` and the parser will + map them to the `sources` attribute in the `TargetType`. Note this example relies on + the builtin mapping of `str` to `List[str]` to align the types between `source` (from + SourceType) and `sources` (from TargetType). + + The following rules apply: + + * All source attributes that map to the same target attribute will be mutually exclusive + (that is, the user cannot give `source` *and* `sources` as input). + * When the target attribute is required, the source attributes are conditionally + mandatory requiring the user to provide exactly one of them. + * When multiple source attributes point to a single target attribute, none of the source + attributes can be Required. + * The annotation can only be used for the source type specification and the source type + specification must be different from the target type specification. + + The `target_attribute` annotation can be used without having multiple source attributes. This + can be useful if the source attribute name is not valid as a python variable identifier to + rename it to a valid python identifier. + + :param target_attribute: The attribute name in the target content + :return: The annotation. + """ + return TargetAttribute(target_attribute) + + @classmethod + def conflicts_with_source_attributes( + cls, + *conflicting_source_attributes: str, + ) -> "DebputyParseHint": + """Declare a conflict with one or more source attributes + + Example: + + >>> class SourceType(TypedDict): + ... source: Annotated[NotRequired[str], DebputyParseHint.target_attribute("sources")] + ... sources: NotRequired[List[str]] + ... into_dir: NotRequired[str] + ... renamed_to: Annotated[ + ... NotRequired[str], + ... DebputyParseHint.conflicts_with_source_attributes("sources", "into_dir") + ... ] + >>> class TargetType(TypedDict): + ... sources: List[str] + ... into_dir: NotRequired[str] + ... renamed_to: NotRequired[str] + >>> pg = ParserGenerator() + >>> parser = pg.parser_from_typed_dict(TargetType, source_content=SourceType) + + In this example, if the user was to provide `renamed_to` with `sources` or `into_dir` the parser would report + an error. However, the parser will allow `renamed_to` with `source` as the conflict is considered only for + the input source. That is, it is irrelevant that `sources` and `source´ happens to "map" to the same target + attribute. + + The following rules apply: + * It is not possible for a target attribute to declare conflicts unless the target type spec is reused as + source type spec. + * All attributes involved in a conflict must be NotRequired. If any of the attributes are Required, then + the parser generator will reject the input. + * All attributes listed in the conflict must be valid attributes in the source type spec. + + Note you do not have to specify conflicts between two attributes with the same target attribute name. The + `target_attribute` annotation will handle that for you. + + :param conflicting_source_attributes: All source attributes that cannot be used with this attribute. + :return: The annotation. + """ + if len(conflicting_source_attributes) < 1: + raise ValueError( + "DebputyParseHint.conflicts_with_source_attributes requires at least one attribute as input" + ) + return ConflictWithSourceAttribute(frozenset(conflicting_source_attributes)) + + @classmethod + def required_when_single_binary( + cls, + *, + package_type: PackageTypeSelector = _ALL_PACKAGE_TYPES, + ) -> "DebputyParseHint": + """Declare a source attribute as required when the source package produces exactly one binary package + + The attribute in question must always be declared as `NotRequired` in the TypedDict and this condition + can only be used for source attributes. + """ + resolved_package_types = resolve_package_type_selectors(package_type) + reason = "The field is required for source packages producing exactly one binary package" + if resolved_package_types != _ALL_PACKAGE_TYPES: + types = ", ".join(sorted(resolved_package_types)) + reason += f" of type {types}" + return ConditionalRequired( + reason, + lambda c: len( + [ + p + for p in c.binary_packages.values() + if p.package_type in package_type + ] + ) + == 1, + ) + return ConditionalRequired( + reason, + lambda c: c.is_single_binary_package, + ) + + @classmethod + def required_when_multi_binary( + cls, + *, + package_type: PackageTypeSelector = _ALL_PACKAGE_TYPES, + ) -> "DebputyParseHint": + """Declare a source attribute as required when the source package produces two or more binary package + + The attribute in question must always be declared as `NotRequired` in the TypedDict and this condition + can only be used for source attributes. + """ + resolved_package_types = resolve_package_type_selectors(package_type) + reason = "The field is required for source packages producing two or more binary packages" + if resolved_package_types != _ALL_PACKAGE_TYPES: + types = ", ".join(sorted(resolved_package_types)) + reason = ( + "The field is required for source packages producing not producing exactly one binary packages" + f" of type {types}" + ) + return ConditionalRequired( + reason, + lambda c: len( + [ + p + for p in c.binary_packages.values() + if p.package_type in package_type + ] + ) + != 1, + ) + return ConditionalRequired( + reason, + lambda c: not c.is_single_binary_package, + ) + + @classmethod + def manifest_attribute(cls, attribute: str) -> "DebputyParseHint": + """Declare what the attribute name (as written in the manifest) should be + + By default, debputy will do an attribute normalizing that will take valid python identifiers such + as `dest_dir` and remap it to the manifest variant (such as `dest-dir`) automatically. If you have + a special case, where this built-in normalization is insufficient or the python name is considerably + different from what the user would write in the manifest, you can use this parse hint to set the + name that the user would have to write in the manifest for this attribute. + + >>> class SourceType(TypedDict): + ... source: List[FileSystemMatchRule] + ... # Use "as" in the manifest because "as_" was not pretty enough + ... install_as: Annotated[NotRequired[FileSystemExactMatchRule], DebputyParseHint.manifest_attribute("as")] + + In this example, we use the parse hint to use "as" as the name in the manifest, because we cannot + use "as" a valid python identifier (it is a keyword). While debputy would map `as_` to `as` for us, + we have chosen to use `install_as` as a python identifier. + """ + return ManifestAttribute(attribute) + + @classmethod + def not_path_error_hint(cls) -> "DebputyParseHint": + """Mark this attribute as not a "path hint" when it comes to reporting errors + + By default, `debputy` will pick up attributes that uses path names (FileSystemMatchRule) as + candidates for parse error hints (the little "<Search for: VALUE>" in error messages). + + Most rules only have one active path-based attribute and paths tends to be unique enough + that it helps people spot the issue faster. However, in rare cases, you can have multiple + attributes that fit the bill. In this case, this hint can be used to "hide" the suboptimal + choice. As an example: + + >>> class SourceType(TypedDict): + ... source: List[FileSystemMatchRule] + ... install_as: Annotated[NotRequired[FileSystemExactMatchRule], DebputyParseHint.not_path_error_hint()] + + In this case, without the hint, `debputy` might pick up `install_as` as the attribute to + use as hint for error reporting. However, here we have decided that we never want `install_as` + leaving `source` as the only option. + + Generally, this type hint must be placed on the **source** format. Any source attribute matching + the parsed format will be ignored. + + Mind the assymmetry: The annotation is placed in the **source** format while `debputy` looks at + the type of the target attribute to determine if it counts as path. + """ + return NOT_PATH_HINT + + +@dataclasses.dataclass(frozen=True, slots=True) +class TargetAttribute(DebputyParseHint): + attribute: str + + +@dataclasses.dataclass(frozen=True, slots=True) +class ConflictWithSourceAttribute(DebputyParseHint): + conflicting_attributes: FrozenSet[str] + + +@dataclasses.dataclass(frozen=True, slots=True) +class ConditionalRequired(DebputyParseHint): + reason: str + condition: Callable[["ParserContextData"], bool] + + def condition_applies(self, context: "ParserContextData") -> bool: + return self.condition(context) + + +@dataclasses.dataclass(frozen=True, slots=True) +class ManifestAttribute(DebputyParseHint): + attribute: str + + +class NotPathHint(DebputyParseHint): + pass + + +NOT_PATH_HINT = NotPathHint() + + +def _is_path_attribute_candidate( + source_attribute: AttributeDescription, target_attribute: AttributeDescription +) -> bool: + if ( + source_attribute.parse_hints + and not source_attribute.parse_hints.applicable_as_path_hint + ): + return False + target_type = target_attribute.attribute_type + _, origin, args = unpack_type(target_type, False) + match_type = target_type + if origin == list: + match_type = args[0] + return isinstance(match_type, type) and issubclass(match_type, FileSystemMatchRule) + + +class ParserGenerator: + def __init__(self) -> None: + self._registered_types: Dict[Any, TypeMapping[Any, Any]] = {} + + def register_mapped_type(self, mapped_type: TypeMapping) -> None: + existing = self._registered_types.get(mapped_type.target_type) + if existing is not None: + raise ValueError(f"The type {existing} is already registered") + self._registered_types[mapped_type.target_type] = mapped_type + + def discard_mapped_type(self, mapped_type: Type[T]) -> None: + del self._registered_types[mapped_type] + + def parser_from_typed_dict( + self, + parsed_content: Type[TD], + *, + source_content: Optional[SF] = None, + allow_optional: bool = False, + inline_reference_documentation: Optional[ParserDocumentation] = None, + ) -> DeclarativeInputParser[TD]: + """Derive a parser from a TypedDict + + Generates a parser for a segment of the manifest (think the `install-docs` snippet) from a TypedDict + or two that are used as a description. + + In its most simple use-case, the caller provides a TypedDict of the expected attributed along with + their types. As an example: + + >>> class InstallDocsRule(DebputyParsedContent): + ... sources: List[str] + ... into: List[str] + >>> pg = ParserGenerator() + >>> simple_parser = pg.parser_from_typed_dict(InstallDocsRule) + + This will create a parser that would be able to interpret something like: + + ```yaml + install-docs: + sources: ["docs/*"] + into: ["my-pkg"] + ``` + + While this is sufficient for programmers, it is a bit ridig for the packager writing the manifest. Therefore, + you can also provide a TypedDict descriping the input, enabling more flexibility: + + >>> class InstallDocsRule(DebputyParsedContent): + ... sources: List[str] + ... into: List[str] + >>> class InputDocsRuleInputFormat(TypedDict): + ... source: NotRequired[Annotated[str, DebputyParseHint.target_attribute("sources")]] + ... sources: NotRequired[List[str]] + ... into: Union[str, List[str]] + >>> pg = ParserGenerator() + >>> flexible_parser = pg.parser_from_typed_dict( + ... InstallDocsRule, + ... source_content=InputDocsRuleInputFormat, + ... ) + + In this case, the `sources` field can either come from a single `source` in the manifest (which must be a string) + or `sources` (which must be a list of strings). The parser also ensures that only one of `source` or `sources` + is used to ensure the input is not ambigious. For the `into` parameter, the parser will accept it being a str + or a list of strings. Regardless of how the input was provided, the parser will normalize the input such that + both `sources` and `into` in the result is a list of strings. As an example, this parser can accept + both the previous input but also the following input: + + ```yaml + install-docs: + source: "docs/*" + into: "my-pkg" + ``` + + The `source` and `into` attributes are then normalized to lists as if the user had written them as lists + with a single string in them. As noted above, the name of the `source` attribute will also be normalized + while parsing. + + In the cases where only one field is required by the user, it can sometimes make sense to allow a non-dict + as part of the input. Example: + + >>> class DiscardRule(DebputyParsedContent): + ... paths: List[str] + >>> class DiscardRuleInputDictFormat(TypedDict): + ... path: NotRequired[Annotated[str, DebputyParseHint.target_attribute("paths")]] + ... paths: NotRequired[List[str]] + >>> # This format relies on DiscardRule having exactly one Required attribute + >>> DiscardRuleInputWithAltFormat = Union[ + ... DiscardRuleInputDictFormat, + ... str, + ... List[str], + ... ] + >>> pg = ParserGenerator() + >>> flexible_parser = pg.parser_from_typed_dict( + ... DiscardRule, + ... source_content=DiscardRuleInputWithAltFormat, + ... ) + + + Supported types: + * `List` - must have a fixed type argument (such as `List[str]`) + * `str` + * `int` + * `BinaryPackage` - When provided (or required), the user must provide a package name listed + in the debian/control file. The code receives the BinaryPackage instance + matching that input. + * `FileSystemMode` - When provided (or required), the user must provide a file system mode in any + format that `debputy' provides (such as `0644` or `a=rw,go=rw`). + * `FileSystemOwner` - When provided (or required), the user must a file system owner that is + available statically on all Debian systems (must be in `base-passwd`). + The user has multiple options for how to specify it (either via name or id). + * `FileSystemGroup` - When provided (or required), the user must a file system group that is + available statically on all Debian systems (must be in `base-passwd`). + The user has multiple options for how to specify it (either via name or id). + * `ManifestCondition` - When provided (or required), the user must specify a conditional rule to apply. + Usually, it is better to extend `DebputyParsedContentStandardConditional`, which + provides the `debputy' default `when` parameter for conditionals. + + Supported special type-like parameters: + + * `Required` / `NotRequired` to mark a field as `Required` or `NotRequired`. Must be provided at the + outermost level. Cannot vary between `parsed_content` and `source_content`. + * `Annotated`. Accepted at the outermost level (inside Required/NotRequired) but ignored at the moment. + * `Union`. Must be the outermost level (inside `Annotated` or/and `Required`/`NotRequired` if these are present). + Automapping (see below) is restricted to two members in the Union. + + Notable non-supported types: + * `Mapping` and all variants therefore (such as `dict`). In the future, nested `TypedDict`s may be allowed. + * `Optional` (or `Union[..., None]`): Use `NotRequired` for optional fields. + + Automatic mapping rules from `source_content` to `parsed_content`: + - `Union[T, List[T]]` can be narrowed automatically to `List[T]`. Transformation is basically: + `lambda value: value if isinstance(value, list) else [value]` + - `T` can be mapped automatically to `List[T]`, Transformation being: `lambda value: [value]` + + Additionally, types can be annotated (`Annotated[str, ...]`) with `DebputyParseHint`s. Check its classmethod + for concrete features that may be useful to you. + + :param parsed_content: A DebputyParsedContent / TypedDict describing the desired model of the input once parsed. + (DebputyParsedContent is a TypedDict subclass that work around some inadequate type checkers) + :param source_content: Optionally, a TypedDict describing the input allowed by the user. This can be useful + to describe more variations than in `parsed_content` that the parser will normalize for you. If omitted, + the parsed_content is also considered the source_content (which affects what annotations are allowed in it). + Note you should never pass the parsed_content as source_content directly. + :param allow_optional: In rare cases, you want to support explicitly provided vs. optional. In this case, you + should set this to True. Though, in 99.9% of all cases, you want `NotRequired` rather than `Optional` (and + can keep this False). + :param inline_reference_documentation: Optionally, programmatic documentation + :return: An input parser capable of reading input matching the TypedDict(s) used as reference. + """ + if not is_typeddict(parsed_content): + raise ValueError( + f"Unsupported parsed_content descriptor: {parsed_content.__qualname__}." + ' Only "TypedDict"-based types supported.' + ) + if source_content is parsed_content: + raise ValueError( + "Do not provide source_content if it is the same as parsed_content" + ) + + target_attributes = self._parse_types( + parsed_content, + allow_source_attribute_annotations=source_content is None, + forbid_optional=not allow_optional, + ) + required_target_parameters = frozenset(parsed_content.__required_keys__) + parsed_alt_form = None + non_mapping_source_only = False + + if source_content is not None: + default_target_attribute = None + if len(required_target_parameters) == 1: + default_target_attribute = next(iter(required_target_parameters)) + + source_typed_dict, alt_source_forms = _extract_typed_dict( + source_content, + default_target_attribute, + ) + if alt_source_forms: + parsed_alt_form = self._parse_alt_form( + alt_source_forms, + default_target_attribute, + ) + if source_typed_dict is not None: + source_content_attributes = self._parse_types( + source_typed_dict, + allow_target_attribute_annotation=True, + allow_source_attribute_annotations=True, + forbid_optional=not allow_optional, + ) + source_content_parameter = "source_content" + source_and_parsed_differs = True + else: + source_typed_dict = parsed_content + source_content_attributes = target_attributes + source_content_parameter = "parsed_content" + source_and_parsed_differs = True + non_mapping_source_only = True + else: + source_typed_dict = parsed_content + source_content_attributes = target_attributes + source_content_parameter = "parsed_content" + source_and_parsed_differs = False + + sources = collections.defaultdict(set) + seen_targets = set() + seen_source_names: Dict[str, str] = {} + source_attributes: Dict[str, AttributeDescription] = {} + path_hint_source_attributes = [] + + for k in source_content_attributes: + ia = source_content_attributes[k] + + ta = ( + target_attributes.get(ia.target_attribute) + if source_and_parsed_differs + else ia + ) + if ta is None: + # Error message would be wrong if this assertion is false. + assert source_and_parsed_differs + raise ValueError( + f'The attribute "{k}" from the "source_content" parameter should have mapped' + f' to "{ia.target_attribute}", but that parameter does not exist in "parsed_content"' + ) + if _is_path_attribute_candidate(ia, ta): + path_hint_source_attributes.append(ia.source_attribute_name) + existing_source_name = seen_source_names.get(ia.source_attribute_name) + if existing_source_name: + raise ValueError( + f'The attribute "{k}" and "{existing_source_name}" both share the source name' + f' "{ia.source_attribute_name}". Please change the {source_content_parameter} parameter,' + f' so only one attribute use "{ia.source_attribute_name}".' + ) + seen_source_names[ia.source_attribute_name] = k + seen_targets.add(ta.target_attribute) + sources[ia.target_attribute].add(k) + if source_and_parsed_differs: + bridge_mapper = self._type_normalize( + k, ia.attribute_type, ta.attribute_type, False + ) + ia.type_validator = ia.type_validator.combine_mapper(bridge_mapper) + source_attributes[k] = ia + + def _as_attr_names(td_name: Iterable[str]) -> FrozenSet[str]: + return frozenset( + source_content_attributes[a].source_attribute_name for a in td_name + ) + + _check_attributes( + parsed_content, + source_typed_dict, + source_content_attributes, + sources, + ) + + at_least_one_of = frozenset( + _as_attr_names(g) + for k, g in sources.items() + if len(g) > 1 and k in required_target_parameters + ) + + if source_and_parsed_differs and seen_targets != target_attributes.keys(): + missing = ", ".join( + repr(k) for k in (target_attributes.keys() - seen_targets) + ) + raise ValueError( + 'The following attributes in "parsed_content" did not have a source field in "source_content":' + f" {missing}" + ) + all_mutually_exclusive_fields = frozenset( + _as_attr_names(g) for g in sources.values() if len(g) > 1 + ) + + all_parameters = ( + source_typed_dict.__required_keys__ | source_typed_dict.__optional_keys__ + ) + _check_conflicts( + source_content_attributes, + source_typed_dict.__required_keys__, + all_parameters, + ) + + manifest_attributes = { + a.source_attribute_name: a for a in source_content_attributes.values() + } + + if parsed_alt_form is not None: + target_attribute = parsed_alt_form.target_attribute + if ( + target_attribute not in required_target_parameters + and required_target_parameters + or len(required_target_parameters) > 1 + ): + raise NotImplementedError( + "When using alternative source formats (Union[TypedDict, ...]), then the" + " target must have at most one require parameter" + ) + bridge_mapper = self._type_normalize( + target_attribute, + parsed_alt_form.attribute_type, + target_attributes[target_attribute].attribute_type, + False, + ) + parsed_alt_form.type_validator = ( + parsed_alt_form.type_validator.combine_mapper(bridge_mapper) + ) + + _verify_inline_reference_documentation( + source_content_attributes, + inline_reference_documentation, + parsed_alt_form is not None, + ) + if non_mapping_source_only: + return DeclarativeNonMappingInputParser( + assume_not_none(parsed_alt_form), + inline_reference_documentation=inline_reference_documentation, + ) + else: + return DeclarativeMappingInputParser( + _as_attr_names(source_typed_dict.__required_keys__), + _as_attr_names(all_parameters), + manifest_attributes, + source_attributes, + mutually_exclusive_attributes=all_mutually_exclusive_fields, + alt_form_parser=parsed_alt_form, + at_least_one_of=at_least_one_of, + inline_reference_documentation=inline_reference_documentation, + path_hint_source_attributes=tuple(path_hint_source_attributes), + ) + + def _as_type_validator( + self, + attribute: str, + provided_type: Any, + parsing_typed_dict_attribute: bool, + ) -> AttributeTypeHandler: + assert not isinstance(provided_type, tuple) + + if isinstance(provided_type, type) and issubclass( + provided_type, DebputyDispatchableType + ): + return _dispatch_parser(provided_type) + + unmapped_type = self._strip_mapped_types( + provided_type, + parsing_typed_dict_attribute, + ) + type_normalizer = self._type_normalize( + attribute, + unmapped_type, + provided_type, + parsing_typed_dict_attribute, + ) + t_unmapped, t_orig, t_args = unpack_type( + unmapped_type, + parsing_typed_dict_attribute, + ) + + if ( + t_orig == Union + and t_args + and len(t_args) == 2 + and any(v is _NONE_TYPE for v in t_args) + ): + _, _, args = unpack_type(provided_type, parsing_typed_dict_attribute) + actual_type = [a for a in args if a is not _NONE_TYPE][0] + validator = self._as_type_validator( + attribute, actual_type, parsing_typed_dict_attribute + ) + + def _validator(v: Any, path: AttributePath) -> None: + if v is None: + return + validator.ensure_type(v, path) + + return AttributeTypeHandler( + validator.describe_type(), + _validator, + base_type=validator.base_type, + mapper=type_normalizer, + ) + + if unmapped_type in BASIC_SIMPLE_TYPES: + type_name = BASIC_SIMPLE_TYPES[unmapped_type] + + type_mapping = self._registered_types.get(provided_type) + if type_mapping is not None: + simple_type = f" ({type_name})" + type_name = type_mapping.target_type.__name__ + else: + simple_type = "" + + def _validator(v: Any, path: AttributePath) -> None: + if not isinstance(v, unmapped_type): + _validation_type_error( + path, f"The attribute must be a {type_name}{simple_type}" + ) + + return AttributeTypeHandler( + type_name, + _validator, + base_type=unmapped_type, + mapper=type_normalizer, + ) + if t_orig == list: + if not t_args: + raise ValueError( + f'The attribute "{attribute}" is List but does not have Generics (Must use List[X])' + ) + _, t_provided_orig, t_provided_args = unpack_type( + provided_type, + parsing_typed_dict_attribute, + ) + genetic_type = t_args[0] + key_mapper = self._as_type_validator( + attribute, + genetic_type, + parsing_typed_dict_attribute, + ) + + def _validator(v: Any, path: AttributePath) -> None: + if not isinstance(v, list): + _validation_type_error(path, "The attribute must be a list") + for i, v in enumerate(v): + key_mapper.ensure_type(v, path[i]) + + list_mapper = ( + map_each_element(key_mapper.mapper) + if key_mapper.mapper is not None + else None + ) + + return AttributeTypeHandler( + f"List of {key_mapper.describe_type()}", + _validator, + base_type=list, + mapper=type_normalizer, + ).combine_mapper(list_mapper) + if is_typeddict(provided_type): + subparser = self.parser_from_typed_dict(cast("Type[TD]", provided_type)) + return AttributeTypeHandler( + description=f"{provided_type.__name__} (Typed Mapping)", + ensure_type=lambda v, ap: None, + base_type=dict, + mapper=lambda v, ap, cv: subparser.parse_input( + v, ap, parser_context=cv + ), + ) + if t_orig == dict: + if not t_args or len(t_args) != 2: + raise ValueError( + f'The attribute "{attribute}" is Dict but does not have Generics (Must use Dict[str, Y])' + ) + if t_args[0] != str: + raise ValueError( + f'The attribute "{attribute}" is Dict and has a non-str type as key.' + " Currently, only `str` is supported (Dict[str, Y])" + ) + key_mapper = self._as_type_validator( + attribute, + t_args[0], + parsing_typed_dict_attribute, + ) + value_mapper = self._as_type_validator( + attribute, + t_args[1], + parsing_typed_dict_attribute, + ) + + if key_mapper.base_type is None: + raise ValueError( + f'The attribute "{attribute}" is Dict and the key did not have a trivial base type. Key types' + f" without trivial base types (such as `str`) are not supported at the moment." + ) + + if value_mapper.mapper is not None: + raise ValueError( + f'The attribute "{attribute}" is Dict and the value requires mapping.' + " Currently, this is not supported. Consider a simpler type (such as Dict[str, str] or Dict[str, Any])." + " Better typing may come later" + ) + + def _validator(uv: Any, path: AttributePath) -> None: + if not isinstance(uv, dict): + _validation_type_error(path, "The attribute must be a mapping") + key_name = "the first key in the mapping" + for i, (k, v) in enumerate(uv.items()): + if not key_mapper.base_type_match(k): + kp = path.copy_with_path_hint(key_name) + _validation_type_error( + kp, + f'The key number {i + 1} in attribute "{kp}" must be a {key_mapper.describe_type()}', + ) + key_name = f"the key after {k}" + value_mapper.ensure_type(v, path[k]) + + return AttributeTypeHandler( + f"Mapping of {value_mapper.describe_type()}", + _validator, + base_type=dict, + mapper=type_normalizer, + ).combine_mapper(key_mapper.mapper) + if t_orig == Union: + if _is_two_arg_x_list_x(t_args): + # Force the order to be "X, List[X]" as it simplifies the code + x_list_x = ( + t_args if get_origin(t_args[1]) == list else (t_args[1], t_args[0]) + ) + + # X, List[X] could match if X was List[Y]. However, our code below assumes + # that X is a non-list. The `_is_two_arg_x_list_x` returns False for this + # case to avoid this assert and fall into the "generic case". + assert get_origin(x_list_x[0]) != list + x_subtype_checker = self._as_type_validator( + attribute, + x_list_x[0], + parsing_typed_dict_attribute, + ) + list_x_subtype_checker = self._as_type_validator( + attribute, + x_list_x[1], + parsing_typed_dict_attribute, + ) + type_description = x_subtype_checker.describe_type() + type_description = f"{type_description} or a list of {type_description}" + + def _validator(v: Any, path: AttributePath) -> None: + if isinstance(v, list): + list_x_subtype_checker.ensure_type(v, path) + else: + x_subtype_checker.ensure_type(v, path) + + return AttributeTypeHandler( + type_description, + _validator, + mapper=type_normalizer, + ) + else: + subtype_checker = [ + self._as_type_validator(attribute, a, parsing_typed_dict_attribute) + for a in t_args + ] + type_description = "one-of: " + ", ".join( + f"{sc.describe_type()}" for sc in subtype_checker + ) + mapper = subtype_checker[0].mapper + if any(mapper != sc.mapper for sc in subtype_checker): + raise ValueError( + f'Cannot handle the union "{provided_type}" as the target types need different' + " type normalization/mapping logic. Unions are generally limited to Union[X, List[X]]" + " where X is a non-collection type." + ) + + def _validator(v: Any, path: AttributePath) -> None: + partial_matches = [] + for sc in subtype_checker: + try: + sc.ensure_type(v, path) + return + except ManifestParseException as e: + if sc.base_type_match(v): + partial_matches.append((sc, e)) + + if len(partial_matches) == 1: + raise partial_matches[0][1] + _validation_type_error( + path, f"Could not match against: {type_description}" + ) + + return AttributeTypeHandler( + type_description, + _validator, + mapper=type_normalizer, + ) + if t_orig == Literal: + # We want "x" for string values; repr provides 'x' + pretty = ", ".join( + f'"{v}"' if isinstance(v, str) else str(v) for v in t_args + ) + + def _validator(v: Any, path: AttributePath) -> None: + if v not in t_args: + value_hint = "" + if isinstance(v, str): + value_hint = f"({v}) " + _validation_type_error( + path, + f"Value {value_hint}must be one of the following literal values: {pretty}", + ) + + return AttributeTypeHandler( + f"One of the following literal values: {pretty}", + _validator, + ) + + if provided_type == Any: + return AttributeTypeHandler( + "any (unvalidated)", + lambda *a: None, + ) + raise ValueError( + f'The attribute "{attribute}" had/contained a type {provided_type}, which is not supported' + ) + + def _parse_types( + self, + spec: Type[TypedDict], + allow_target_attribute_annotation: bool = False, + allow_source_attribute_annotations: bool = False, + forbid_optional: bool = True, + ) -> Dict[str, AttributeDescription]: + annotations = get_type_hints(spec, include_extras=True) + return { + k: self._attribute_description( + k, + t, + k in spec.__required_keys__, + allow_target_attribute_annotation=allow_target_attribute_annotation, + allow_source_attribute_annotations=allow_source_attribute_annotations, + forbid_optional=forbid_optional, + ) + for k, t in annotations.items() + } + + def _attribute_description( + self, + attribute: str, + orig_td: Any, + is_required: bool, + forbid_optional: bool = True, + allow_target_attribute_annotation: bool = False, + allow_source_attribute_annotations: bool = False, + ) -> AttributeDescription: + td, anno, is_optional = _parse_type( + attribute, orig_td, forbid_optional=forbid_optional + ) + type_validator = self._as_type_validator(attribute, td, True) + parsed_annotations = DetectedDebputyParseHint.parse_annotations( + anno, + f' Seen with attribute "{attribute}".', + attribute, + is_required, + allow_target_attribute_annotation=allow_target_attribute_annotation, + allow_source_attribute_annotations=allow_source_attribute_annotations, + ) + return AttributeDescription( + target_attribute=parsed_annotations.target_attribute, + attribute_type=td, + type_validator=type_validator, + annotations=anno, + is_optional=is_optional, + conflicting_attributes=parsed_annotations.conflict_with_source_attributes, + conditional_required=parsed_annotations.conditional_required, + source_attribute_name=assume_not_none( + parsed_annotations.source_manifest_attribute + ), + parse_hints=parsed_annotations, + ) + + def _parse_alt_form( + self, + alt_form, + default_target_attribute: Optional[str], + ) -> AttributeDescription: + td, anno, is_optional = _parse_type( + "source_format alternative form", + alt_form, + forbid_optional=True, + parsing_typed_dict_attribute=False, + ) + type_validator = self._as_type_validator( + "source_format alternative form", + td, + True, + ) + parsed_annotations = DetectedDebputyParseHint.parse_annotations( + anno, + f" The alternative for source_format.", + None, + False, + default_target_attribute=default_target_attribute, + allow_target_attribute_annotation=True, + allow_source_attribute_annotations=False, + ) + return AttributeDescription( + target_attribute=parsed_annotations.target_attribute, + attribute_type=td, + type_validator=type_validator, + annotations=anno, + is_optional=is_optional, + conflicting_attributes=parsed_annotations.conflict_with_source_attributes, + conditional_required=parsed_annotations.conditional_required, + source_attribute_name="Alt form of the source_format", + ) + + def _union_narrowing( + self, + input_type: Any, + target_type: Any, + parsing_typed_dict_attribute: bool, + ) -> Optional[Callable[[Any, AttributePath, Optional["ParserContextData"]], Any]]: + _, input_orig, input_args = unpack_type( + input_type, parsing_typed_dict_attribute + ) + _, target_orig, target_args = unpack_type( + target_type, parsing_typed_dict_attribute + ) + + if input_orig != Union or not input_args: + raise ValueError("input_type must be a Union[...] with non-empty args") + + # Currently, we only support Union[X, List[X]] -> List[Y] narrowing or Union[X, List[X]] -> Union[Y, Union[Y]] + # - Where X = Y or there is a simple standard transformation from X to Y. + + if target_orig not in (Union, list) or not target_args: + # Not supported + return None + + if target_orig == Union and set(input_args) == set(target_args): + # Not needed (identity mapping) + return None + + if target_orig == list and not any(get_origin(a) == list for a in input_args): + # Not supported + return None + + target_arg = target_args[0] + simplified_type = self._strip_mapped_types( + target_arg, parsing_typed_dict_attribute + ) + acceptable_types = { + target_arg, + List[target_arg], # type: ignore + simplified_type, + List[simplified_type], # type: ignore + } + target_format = ( + target_arg, + List[target_arg], # type: ignore + ) + in_target_format = 0 + in_simple_format = 0 + for input_arg in input_args: + if input_arg not in acceptable_types: + # Not supported + return None + if input_arg in target_format: + in_target_format += 1 + else: + in_simple_format += 1 + + assert in_simple_format or in_target_format + + if in_target_format and not in_simple_format: + # Union[X, List[X]] -> List[X] + return normalize_into_list + mapped = self._registered_types[target_arg] + if not in_target_format and in_simple_format: + # Union[X, List[X]] -> List[Y] + + def _mapper_x_list_y( + x: Union[Any, List[Any]], + ap: AttributePath, + pc: Optional["ParserContextData"], + ) -> List[Any]: + in_list_form: List[Any] = normalize_into_list(x, ap, pc) + + return [mapped.mapper(x, ap, pc) for x in in_list_form] + + return _mapper_x_list_y + + # Union[Y, List[X]] -> List[Y] + if not isinstance(target_arg, type): + raise ValueError( + f"Cannot narrow {input_type} -> {target_type}: The automatic conversion does" + f" not support mixed types. Please use either {simplified_type} or {target_arg}" + f" in the source content (but both a mix of both)" + ) + + def _mapper_mixed_list_y( + x: Union[Any, List[Any]], + ap: AttributePath, + pc: Optional["ParserContextData"], + ) -> List[Any]: + in_list_form: List[Any] = normalize_into_list(x, ap, pc) + + return [ + x if isinstance(x, target_arg) else mapped.mapper(x, ap, pc) + for x in in_list_form + ] + + return _mapper_mixed_list_y + + def _type_normalize( + self, + attribute: str, + input_type: Any, + target_type: Any, + parsing_typed_dict_attribute: bool, + ) -> Optional[Callable[[Any, AttributePath, Optional["ParserContextData"]], Any]]: + if input_type == target_type: + return None + _, input_orig, input_args = unpack_type( + input_type, parsing_typed_dict_attribute + ) + _, target_orig, target_args = unpack_type( + target_type, + parsing_typed_dict_attribute, + ) + if input_orig == Union: + result = self._union_narrowing( + input_type, target_type, parsing_typed_dict_attribute + ) + if result: + return result + elif target_orig == list and target_args[0] == input_type: + return wrap_into_list + + mapped = self._registered_types.get(target_type) + if mapped is not None and input_type == mapped.source_type: + # Source -> Target + return mapped.mapper + if target_orig == list and target_args: + mapped = self._registered_types.get(target_args[0]) + if mapped is not None: + # mypy is dense and forgots `mapped` cannot be optional in the comprehensions. + mapped_type: TypeMapping = mapped + if input_type == mapped.source_type: + # Source -> List[Target] + return lambda x, ap, pc: [mapped_type.mapper(x, ap, pc)] + if ( + input_orig == list + and input_args + and input_args[0] == mapped_type.source_type + ): + # List[Source] -> List[Target] + return lambda xs, ap, pc: [ + mapped_type.mapper(x, ap, pc) for x in xs + ] + + raise ValueError( + f'Unsupported type normalization for "{attribute}": Cannot automatically map/narrow' + f" {input_type} to {target_type}" + ) + + def _strip_mapped_types( + self, orig_td: Any, parsing_typed_dict_attribute: bool + ) -> Any: + m = self._registered_types.get(orig_td) + if m is not None: + return m.source_type + _, v, args = unpack_type(orig_td, parsing_typed_dict_attribute) + if v == list: + arg = args[0] + m = self._registered_types.get(arg) + if m: + return List[m.source_type] # type: ignore + if v == Union: + stripped_args = tuple( + self._strip_mapped_types(x, parsing_typed_dict_attribute) for x in args + ) + if stripped_args != args: + return Union[stripped_args] + return orig_td + + +def _verify_inline_reference_documentation( + source_content_attributes: Mapping[str, AttributeDescription], + inline_reference_documentation: Optional[ParserDocumentation], + has_alt_form: bool, +) -> None: + if inline_reference_documentation is None: + return + attribute_doc = inline_reference_documentation.attribute_doc + if attribute_doc: + seen = set() + for attr_doc in attribute_doc: + for attr_name in attr_doc.attributes: + attr = source_content_attributes.get(attr_name) + if attr is None: + raise ValueError( + f'The inline_reference_documentation references an attribute "{attr_name}", which does not' + f" exist in the source format." + ) + if attr_name in seen: + raise ValueError( + f'The inline_reference_documentation has documentation for "{attr_name}" twice,' + f" which is not supported. Please document it at most once" + ) + seen.add(attr_name) + + undocumented = source_content_attributes.keys() - seen + if undocumented: + undocumented_attrs = ", ".join(undocumented) + raise ValueError( + "The following attributes were not documented. If this is deliberate, then please" + ' declare each them as undocumented (via undocumented_attr("foo")):' + f" {undocumented_attrs}" + ) + + if inline_reference_documentation.alt_parser_description and not has_alt_form: + raise ValueError( + "The inline_reference_documentation had documentation for an non-mapping format," + " but the source format does not have a non-mapping format." + ) + + +def _check_conflicts( + input_content_attributes: Dict[str, AttributeDescription], + required_attributes: FrozenSet[str], + all_attributes: FrozenSet[str], +) -> None: + for attr_name, attr in input_content_attributes.items(): + if attr_name in required_attributes and attr.conflicting_attributes: + c = ", ".join(repr(a) for a in attr.conflicting_attributes) + raise ValueError( + f'The attribute "{attr_name}" is required and conflicts with the attributes: {c}.' + " This makes it impossible to use these attributes. Either remove the attributes" + f' (along with the conflicts for them), adjust the conflicts or make "{attr_name}"' + " optional (NotRequired)" + ) + else: + required_conflicts = attr.conflicting_attributes & required_attributes + if required_conflicts: + c = ", ".join(repr(a) for a in required_conflicts) + raise ValueError( + f'The attribute "{attr_name}" conflicts with the following *required* attributes: {c}.' + f' This makes it impossible to use the "{attr_name}" attribute. Either remove it,' + f" adjust the conflicts or make the listed attributes optional (NotRequired)" + ) + unknown_attributes = attr.conflicting_attributes - all_attributes + if unknown_attributes: + c = ", ".join(repr(a) for a in unknown_attributes) + raise ValueError( + f'The attribute "{attr_name}" declares a conflict with the following unknown attributes: {c}.' + f" None of these attributes were declared in the input." + ) + + +def _check_attributes( + content: Type[TypedDict], + input_content: Type[TypedDict], + input_content_attributes: Dict[str, AttributeDescription], + sources: Mapping[str, Collection[str]], +) -> None: + target_required_keys = content.__required_keys__ + input_required_keys = input_content.__required_keys__ + all_input_keys = input_required_keys | input_content.__optional_keys__ + + for input_name in all_input_keys: + attr = input_content_attributes[input_name] + target_name = attr.target_attribute + source_names = sources[target_name] + input_is_required = input_name in input_required_keys + target_is_required = target_name in target_required_keys + + assert source_names + + if input_is_required and len(source_names) > 1: + raise ValueError( + f'The source attribute "{input_name}" is required, but it maps to "{target_name}",' + f' which has multiple sources "{source_names}". If "{input_name}" should be required,' + f' then there is no need for additional sources for "{target_name}". Alternatively,' + f' "{input_name}" might be missing a NotRequired type' + f' (example: "{input_name}: NotRequired[<OriginalTypeHere>]")' + ) + if not input_is_required and target_is_required and len(source_names) == 1: + raise ValueError( + f'The source attribute "{input_name}" is not marked as required and maps to' + f' "{target_name}", which is marked as required. As there are no other attributes' + f' mapping to "{target_name}", then "{input_name}" must be required as well' + f' ("{input_name}: Required[<Type>]"). Alternatively, "{target_name}" should be optional' + f' ("{target_name}: NotRequired[<Type>]") or an "MappingHint.aliasOf" might be missing.' + ) + + +def _validation_type_error(path: AttributePath, message: str) -> None: + raise ManifestParseException( + f'The attribute "{path.path}" did not have a valid structure/type: {message}' + ) + + +def _is_two_arg_x_list_x(t_args: Tuple[Any, ...]) -> bool: + if len(t_args) != 2: + return False + lhs, rhs = t_args + if get_origin(lhs) == list: + if get_origin(rhs) == list: + # It could still match X, List[X] - but we do not allow this case for now as the caller + # does not support it. + return False + l_args = get_args(lhs) + return bool(l_args and l_args[0] == rhs) + if get_origin(rhs) == list: + r_args = get_args(rhs) + return bool(r_args and r_args[0] == lhs) + return False + + +def _extract_typed_dict( + base_type, + default_target_attribute: Optional[str], +) -> Tuple[Optional[Type[TypedDict]], Any]: + if is_typeddict(base_type): + return base_type, None + _, origin, args = unpack_type(base_type, False) + if origin != Union: + if isinstance(base_type, type) and issubclass(base_type, (dict, Mapping)): + raise ValueError( + "The source_format cannot be nor contain a (non-TypedDict) dict" + ) + return None, base_type + typed_dicts = [x for x in args if is_typeddict(x)] + if len(typed_dicts) > 1: + raise ValueError( + "When source_format is a Union, it must contain at most one TypedDict" + ) + typed_dict = typed_dicts[0] if typed_dicts else None + + if any(x is None or x is _NONE_TYPE for x in args): + raise ValueError( + "The source_format cannot be nor contain Optional[X] or Union[X, None]" + ) + + if any( + isinstance(x, type) and issubclass(x, (dict, Mapping)) + for x in args + if x is not typed_dict + ): + raise ValueError( + "The source_format cannot be nor contain a (non-TypedDict) dict" + ) + remaining = [x for x in args if x is not typed_dict] + has_target_attribute = False + anno = None + if len(remaining) == 1: + base_type, anno, _ = _parse_type( + "source_format alternative form", + remaining[0], + forbid_optional=True, + parsing_typed_dict_attribute=False, + ) + has_target_attribute = bool(anno) and any( + isinstance(x, TargetAttribute) for x in anno + ) + target_type = base_type + else: + target_type = Union[tuple(remaining)] + + if default_target_attribute is None and not has_target_attribute: + raise ValueError( + 'The alternative format must be Union[TypedDict,Annotated[X, DebputyParseHint.target_attribute("...")]]' + " OR the parsed_content format must have exactly one attribute that is required." + ) + if anno: + final_anno = [target_type] + final_anno.extend(anno) + return typed_dict, Annotated[tuple(final_anno)] + return typed_dict, target_type + + +def _dispatch_parse_generator( + dispatch_type: Type[DebputyDispatchableType], +) -> Callable[[Any, AttributePath, Optional["ParserContextData"]], Any]: + def _dispatch_parse( + value: Any, + attribute_path: AttributePath, + parser_context: Optional["ParserContextData"], + ): + assert parser_context is not None + dispatching_parser = parser_context.dispatch_parser_table_for(dispatch_type) + return dispatching_parser.parse( + value, attribute_path, parser_context=parser_context + ) + + return _dispatch_parse + + +def _dispatch_parser( + dispatch_type: Type[DebputyDispatchableType], +) -> AttributeTypeHandler: + return AttributeTypeHandler( + dispatch_type.__name__, + lambda *a: None, + mapper=_dispatch_parse_generator(dispatch_type), + ) + + +def _parse_type( + attribute: str, + orig_td: Any, + forbid_optional: bool = True, + parsing_typed_dict_attribute: bool = True, +) -> Tuple[Any, Tuple[Any, ...], bool]: + td, v, args = unpack_type(orig_td, parsing_typed_dict_attribute) + md: Tuple[Any, ...] = tuple() + optional = False + if v is not None: + if v == Annotated: + anno = get_args(td) + md = anno[1:] + td, v, args = unpack_type(anno[0], parsing_typed_dict_attribute) + + if td is _NONE_TYPE: + raise ValueError( + f'The attribute "{attribute}" resolved to type "None". "Nil" / "None" fields are not allowed in the' + " debputy manifest, so this attribute does not make sense in its current form." + ) + if forbid_optional and v == Union and any(a is _NONE_TYPE for a in args): + raise ValueError( + f'Detected use of Optional in "{attribute}", which is not allowed here.' + " Please use NotRequired for optional fields" + ) + + return td, md, optional + + +def _normalize_attribute_name(attribute: str) -> str: + if attribute.endswith("_"): + attribute = attribute[:-1] + return attribute.replace("_", "-") + + +@dataclasses.dataclass +class DetectedDebputyParseHint: + target_attribute: str + source_manifest_attribute: Optional[str] + conflict_with_source_attributes: FrozenSet[str] + conditional_required: Optional[ConditionalRequired] + applicable_as_path_hint: bool + + @classmethod + def parse_annotations( + cls, + anno: Tuple[Any, ...], + error_context: str, + default_attribute_name: Optional[str], + is_required: bool, + default_target_attribute: Optional[str] = None, + allow_target_attribute_annotation: bool = False, + allow_source_attribute_annotations: bool = False, + ) -> "DetectedDebputyParseHint": + target_attr_anno = find_annotation(anno, TargetAttribute) + if target_attr_anno: + if not allow_target_attribute_annotation: + raise ValueError( + f"The DebputyParseHint.target_attribute annotation is not allowed in this context.{error_context}" + ) + target_attribute = target_attr_anno.attribute + elif default_target_attribute is not None: + target_attribute = default_target_attribute + elif default_attribute_name is not None: + target_attribute = default_attribute_name + else: + if default_attribute_name is None: + raise ValueError( + "allow_target_attribute_annotation must be True OR " + "default_attribute_name/default_target_attribute must be not None" + ) + raise ValueError( + f"Missing DebputyParseHint.target_attribute annotation.{error_context}" + ) + source_attribute_anno = find_annotation(anno, ManifestAttribute) + _source_attribute_allowed( + allow_source_attribute_annotations, error_context, source_attribute_anno + ) + if source_attribute_anno: + source_attribute_name = source_attribute_anno.attribute + elif default_attribute_name is not None: + source_attribute_name = _normalize_attribute_name(default_attribute_name) + else: + source_attribute_name = None + mutual_exclusive_with_anno = find_annotation(anno, ConflictWithSourceAttribute) + if mutual_exclusive_with_anno: + _source_attribute_allowed( + allow_source_attribute_annotations, + error_context, + mutual_exclusive_with_anno, + ) + conflicting_attributes = mutual_exclusive_with_anno.conflicting_attributes + else: + conflicting_attributes = frozenset() + conditional_required = find_annotation(anno, ConditionalRequired) + + if conditional_required and is_required: + if default_attribute_name is None: + raise ValueError( + f"is_required cannot be True without default_attribute_name being not None" + ) + raise ValueError( + f'The attribute "{default_attribute_name}" is Required while also being conditionally required.' + ' Please make the attribute "NotRequired" or remove the conditional requirement.' + ) + + not_path_hint_anno = find_annotation(anno, NotPathHint) + applicable_as_path_hint = not_path_hint_anno is None + + return DetectedDebputyParseHint( + target_attribute=target_attribute, + source_manifest_attribute=source_attribute_name, + conflict_with_source_attributes=conflicting_attributes, + conditional_required=conditional_required, + applicable_as_path_hint=applicable_as_path_hint, + ) + + +def _source_attribute_allowed( + source_attribute_allowed: bool, + error_context: str, + annotation: Optional[DebputyParseHint], +) -> None: + if source_attribute_allowed or annotation is None: + return + raise ValueError( + f'The annotation "{annotation}" cannot be used here. {error_context}' + ) diff --git a/src/debputy/manifest_parser/exceptions.py b/src/debputy/manifest_parser/exceptions.py new file mode 100644 index 0000000..671ec1b --- /dev/null +++ b/src/debputy/manifest_parser/exceptions.py @@ -0,0 +1,9 @@ +from debputy.exceptions import DebputyRuntimeError + + +class ManifestParseException(DebputyRuntimeError): + pass + + +class ManifestTypeException(ManifestParseException): + pass diff --git a/src/debputy/manifest_parser/mapper_code.py b/src/debputy/manifest_parser/mapper_code.py new file mode 100644 index 0000000..d7a08c3 --- /dev/null +++ b/src/debputy/manifest_parser/mapper_code.py @@ -0,0 +1,77 @@ +from typing import ( + TypeVar, + Optional, + Union, + List, + Callable, +) + +from debputy.manifest_parser.exceptions import ManifestTypeException +from debputy.manifest_parser.parser_data import ParserContextData +from debputy.manifest_parser.util import AttributePath +from debputy.packages import BinaryPackage +from debputy.util import assume_not_none + +S = TypeVar("S") +T = TypeVar("T") + + +def type_mapper_str2package( + raw_package_name: str, + ap: AttributePath, + opc: Optional[ParserContextData], +) -> BinaryPackage: + pc = assume_not_none(opc) + if "{{" in raw_package_name: + resolved_package_name = pc.substitution.substitute(raw_package_name, ap.path) + else: + resolved_package_name = raw_package_name + + package_name_in_message = raw_package_name + if resolved_package_name != raw_package_name: + package_name_in_message = f'"{resolved_package_name}" ["{raw_package_name}"]' + + if not pc.is_known_package(resolved_package_name): + package_names = ", ".join(pc.binary_packages) + raise ManifestTypeException( + f'The value {package_name_in_message} (from "{ap.path}") does not reference a package declared in' + f" debian/control. Valid options are: {package_names}" + ) + package_data = pc.binary_package_data(resolved_package_name) + if package_data.is_auto_generated_package: + package_names = ", ".join(pc.binary_packages) + raise ManifestTypeException( + f'The package name {package_name_in_message} (from "{ap.path}") references an auto-generated package.' + " However, auto-generated packages are now permitted here. Valid options are:" + f" {package_names}" + ) + return package_data.binary_package + + +def wrap_into_list( + x: T, + _ap: AttributePath, + _pc: Optional["ParserContextData"], +) -> List[T]: + return [x] + + +def normalize_into_list( + x: Union[T, List[T]], + _ap: AttributePath, + _pc: Optional["ParserContextData"], +) -> List[T]: + return x if isinstance(x, list) else [x] + + +def map_each_element( + mapper: Callable[[S, AttributePath, Optional["ParserContextData"]], T], +) -> Callable[[List[S], AttributePath, Optional["ParserContextData"]], List[T]]: + def _generated_mapper( + xs: List[S], + ap: AttributePath, + pc: Optional["ParserContextData"], + ) -> List[T]: + return [mapper(s, ap[i], pc) for i, s in enumerate(xs)] + + return _generated_mapper diff --git a/src/debputy/manifest_parser/parser_data.py b/src/debputy/manifest_parser/parser_data.py new file mode 100644 index 0000000..3c36815 --- /dev/null +++ b/src/debputy/manifest_parser/parser_data.py @@ -0,0 +1,133 @@ +import contextlib +from typing import ( + Iterator, + Optional, + Mapping, + NoReturn, + Union, + Any, + TYPE_CHECKING, + Tuple, +) + +from debian.debian_support import DpkgArchTable + +from debputy._deb_options_profiles import DebBuildOptionsAndProfiles +from debputy.architecture_support import DpkgArchitectureBuildProcessValuesTable +from debputy.manifest_conditions import ManifestCondition +from debputy.manifest_parser.exceptions import ManifestParseException +from debputy.manifest_parser.util import AttributePath +from debputy.packages import BinaryPackage +from debputy.plugin.api.impl_types import ( + _ALL_PACKAGE_TYPES, + resolve_package_type_selectors, + TP, + DispatchingTableParser, + TTP, + DispatchingObjectParser, +) +from debputy.plugin.api.spec import PackageTypeSelector +from debputy.substitution import Substitution + + +if TYPE_CHECKING: + from debputy.highlevel_manifest import PackageTransformationDefinition + + +class ParserContextData: + @property + def binary_packages(self) -> Mapping[str, BinaryPackage]: + raise NotImplementedError + + @property + def _package_states(self) -> Mapping[str, "PackageTransformationDefinition"]: + raise NotImplementedError + + @property + def is_single_binary_package(self) -> bool: + return len(self.binary_packages) == 1 + + def single_binary_package( + self, + attribute_path: AttributePath, + *, + package_type: PackageTypeSelector = _ALL_PACKAGE_TYPES, + package_attribute: Optional[str] = None, + ) -> Optional[BinaryPackage]: + resolved_package_types = resolve_package_type_selectors(package_type) + possible_matches = [ + p + for p in self.binary_packages.values() + if p.package_type in resolved_package_types + ] + if len(possible_matches) == 1: + return possible_matches[0] + + if package_attribute is not None: + raise ManifestParseException( + f"The {attribute_path.path} rule needs the attribute `{package_attribute}`" + " for this source package." + ) + + if not possible_matches: + _package_types = ", ".join(sorted(resolved_package_types)) + raise ManifestParseException( + f"The {attribute_path.path} rule is not applicable to this source package" + f" (it only applies to source packages that builds exactly one of" + f" the following package types: {_package_types})." + ) + raise ManifestParseException( + f"The {attribute_path.path} rule is not applicable to multi-binary packages." + ) + + def _error(self, msg: str) -> "NoReturn": + raise ManifestParseException(msg) + + def is_known_package(self, package_name: str) -> bool: + return package_name in self._package_states + + def binary_package_data( + self, + package_name: str, + ) -> "PackageTransformationDefinition": + if package_name not in self._package_states: + self._error( + f'The package "{package_name}" is not present in the debian/control file (could not find' + f' "Package: {package_name}" in a binary stanza) nor is it a -dbgsym package for one' + " for a package in debian/control." + ) + return self._package_states[package_name] + + @property + def dpkg_architecture_variables(self) -> DpkgArchitectureBuildProcessValuesTable: + raise NotImplementedError + + @property + def dpkg_arch_query_table(self) -> DpkgArchTable: + raise NotImplementedError + + @property + def build_env(self) -> DebBuildOptionsAndProfiles: + raise NotImplementedError + + @contextlib.contextmanager + def binary_package_context( + self, + package_name: str, + ) -> Iterator["PackageTransformationDefinition"]: + raise NotImplementedError + + @property + def substitution(self) -> Substitution: + raise NotImplementedError + + @property + def current_binary_package_state(self) -> "PackageTransformationDefinition": + raise NotImplementedError + + @property + def is_in_binary_package_state(self) -> bool: + raise NotImplementedError + + def dispatch_parser_table_for(self, rule_type: TTP) -> DispatchingTableParser[TP]: + raise NotImplementedError diff --git a/src/debputy/manifest_parser/util.py b/src/debputy/manifest_parser/util.py new file mode 100644 index 0000000..1600a90 --- /dev/null +++ b/src/debputy/manifest_parser/util.py @@ -0,0 +1,314 @@ +import dataclasses +from typing import ( + Iterator, + Union, + Self, + Optional, + List, + Tuple, + Mapping, + get_origin, + get_args, + Any, + Type, + TypeVar, + TYPE_CHECKING, +) + +if TYPE_CHECKING: + from debputy.manifest_parser.declarative_parser import DebputyParseHint + + +MP = TypeVar("MP", bound="DebputyParseHint") +StrOrInt = Union[str, int] +AttributePathAliasMapping = Mapping[ + StrOrInt, Tuple[StrOrInt, Optional["AttributePathAliasMapping"]] +] + + +class AttributePath(object): + __slots__ = ("parent", "name", "alias_mapping", "path_hint") + + def __init__( + self, + parent: Optional["AttributePath"], + key: Optional[Union[str, int]], + *, + alias_mapping: Optional[AttributePathAliasMapping] = None, + ) -> None: + self.parent = parent + self.name = key + self.path_hint: Optional[str] = None + self.alias_mapping = alias_mapping + + @classmethod + def root_path(cls) -> "AttributePath": + return AttributePath(None, None) + + @classmethod + def builtin_path(cls) -> "AttributePath": + return AttributePath(None, "$builtin$") + + @classmethod + def test_path(cls) -> "AttributePath": + return AttributePath(None, "$test$") + + def __bool__(self) -> bool: + return self.name is not None or self.parent is not None + + def copy_with_path_hint(self, path_hint: str) -> "AttributePath": + p = self.__class__(self.parent, self.name, alias_mapping=self.alias_mapping) + p.path_hint = path_hint + return p + + @property + def path(self) -> str: + segments = list(self._iter_path()) + segments.reverse() + parts: List[str] = [] + path_hint = None + + for s in segments: + k = s.name + s_path_hint = s.path_hint + if s_path_hint is not None: + path_hint = s_path_hint + if isinstance(k, int): + parts.append(f"[{k}]") + elif k is not None: + if parts: + parts.append(".") + parts.append(k) + if path_hint: + parts.append(f" <Search for: {path_hint}>") + if not parts: + return "document root" + return "".join(parts) + + def __str__(self) -> str: + return self.path + + def __getitem__(self, item: Union[str, int]) -> "AttributePath": + alias_mapping = None + if self.alias_mapping: + match = self.alias_mapping.get(item) + if match: + item, alias_mapping = match + if item == "": + # Support `sources[0]` mapping to `source` by `sources -> source` and `0 -> ""`. + return AttributePath( + self.parent, self.name, alias_mapping=alias_mapping + ) + return AttributePath(self, item, alias_mapping=alias_mapping) + + def _iter_path(self) -> Iterator["AttributePath"]: + current = self + yield current + while True: + parent = current.parent + if not parent: + break + current = parent + yield current + + +@dataclasses.dataclass(slots=True, frozen=True) +class _SymbolicModeSegment: + base_mode: int + base_mask: int + cap_x_mode: int + cap_x_mask: int + + def apply(self, current_mode: int, is_dir: bool) -> int: + if current_mode & 0o111 or is_dir: + chosen_mode = self.cap_x_mode + mode_mask = self.cap_x_mask + else: + chosen_mode = self.base_mode + mode_mask = self.base_mask + # set ("="): mode mask clears relevant segment and current_mode are the desired bits + # add ("+"): mode mask keeps everything and current_mode are the desired bits + # remove ("-"): mode mask clears relevant bits and current_mode are 0 + return (current_mode & mode_mask) | chosen_mode + + +def _symbolic_mode_bit_inverse(v: int) -> int: + # The & part is necessary because otherwise python narrows the inversion to the minimum number of bits + # required, which is not what we want. + return ~v & 0o7777 + + +def parse_symbolic_mode( + symbolic_mode: str, + attribute_path: Optional[AttributePath], +) -> Iterator[_SymbolicModeSegment]: + sticky_bit = 0o01000 + setuid_bit = 0o04000 + setgid_bit = 0o02000 + mode_group_flag = 0o7 + subject_mask_and_shift = { + "u": (mode_group_flag << 6, 6), + "g": (mode_group_flag << 3, 3), + "o": (mode_group_flag << 0, 0), + } + bits = { + "r": (0o4, 0o4), + "w": (0o2, 0o2), + "x": (0o1, 0o1), + "X": (0o0, 0o1), + "s": (0o0, 0o0), # Special-cased below (it depends on the subject) + "t": (0o0, 0o0), # Special-cased below + } + modifiers = { + "+", + "-", + "=", + } + in_path = f" in {attribute_path.path}" if attribute_path is not None else "" + for orig_part in symbolic_mode.split(","): + base_mode = 0 + cap_x_mode = 0 + part = orig_part + subjects = set() + while part and part[0] in ("u", "g", "o", "a"): + subject = part[0] + if subject == "a": + subjects = {"u", "g", "o"} + else: + subjects.add(subject) + part = part[1:] + if not subjects: + subjects = {"u", "g", "o"} + + if part and part[0] in modifiers: + modifier = part[0] + elif not part: + raise ValueError( + f'Invalid symbolic mode{in_path}: expected [+-=] to be present (from "{orig_part}")' + ) + else: + raise ValueError( + f'Invalid symbolic mode{in_path}: Expected "{part[0]}" to be one of [+-=]' + f' (from "{orig_part}")' + ) + part = part[1:] + s_bit_seen = False + t_bit_seen = False + while part and part[0] in bits: + if part == "s": + s_bit_seen = True + elif part == "t": + t_bit_seen = True + elif part in ("u", "g", "o"): + raise NotImplementedError( + f"Cannot parse symbolic mode{in_path}: Sorry, we do not support referencing an" + " existing subject's permissions (a=u) in symbolic modes." + ) + else: + matched_bits = bits.get(part[0]) + if matched_bits is None: + valid_bits = "".join(bits) + raise ValueError( + f'Invalid symbolic mode{in_path}: Expected "{part[0]}" to be one of the letters' + f' in "{valid_bits}" (from "{orig_part}")' + ) + base_mode_bits, cap_x_mode_bits = bits[part[0]] + base_mode |= base_mode_bits + cap_x_mode |= cap_x_mode_bits + part = part[1:] + + if part: + raise ValueError( + f'Invalid symbolic mode{in_path}: Could not parse "{part[0]}" from "{orig_part}"' + ) + + final_base_mode = 0 + final_cap_x_mode = 0 + segment_mask = 0 + for subject in subjects: + mask, shift = subject_mask_and_shift[subject] + segment_mask |= mask + final_base_mode |= base_mode << shift + final_cap_x_mode |= cap_x_mode << shift + if modifier == "=": + segment_mask |= setuid_bit if "u" in subjects else 0 + segment_mask |= setgid_bit if "g" in subjects else 0 + segment_mask |= sticky_bit if "o" in subjects else 0 + if s_bit_seen: + if "u" in subjects: + final_base_mode |= setuid_bit + final_cap_x_mode |= setuid_bit + if "g" in subjects: + final_base_mode |= setgid_bit + final_cap_x_mode |= setgid_bit + if t_bit_seen: + final_base_mode |= sticky_bit + final_cap_x_mode |= sticky_bit + if modifier == "+": + final_base_mask = ~0 + final_cap_x_mask = ~0 + elif modifier == "-": + final_base_mask = _symbolic_mode_bit_inverse(final_base_mode) + final_cap_x_mask = _symbolic_mode_bit_inverse(final_cap_x_mode) + final_base_mode = 0 + final_cap_x_mode = 0 + elif modifier == "=": + # FIXME: Handle "unmentioned directory's setgid/setuid bits" + inverted_mask = _symbolic_mode_bit_inverse(segment_mask) + final_base_mask = inverted_mask + final_cap_x_mask = inverted_mask + else: + raise AssertionError( + f"Unknown modifier in symbolic mode: {modifier} - should not have happened" + ) + yield _SymbolicModeSegment( + base_mode=final_base_mode, + base_mask=final_base_mask, + cap_x_mode=final_cap_x_mode, + cap_x_mask=final_cap_x_mask, + ) + + +def unpack_type( + orig_type: Any, + parsing_typed_dict_attribute: bool, +) -> Tuple[Any, Optional[Any], Tuple[Any, ...]]: + raw_type = orig_type + origin = get_origin(raw_type) + args = get_args(raw_type) + if not parsing_typed_dict_attribute and repr(origin) in ( + "typing.NotRequired", + "typing.Required", + ): + raise ValueError( + f"The Required/NotRequired attributes cannot be used outside typed dicts," + f" the type that triggered the error: {orig_type}" + ) + + while repr(origin) in ("typing.NotRequired", "typing.Required"): + if len(args) != 1: + raise ValueError( + f"The type {raw_type} should have exactly one type parameter" + ) + raw_type = args[0] + origin = get_origin(raw_type) + args = get_args(raw_type) + + assert not isinstance(raw_type, tuple) + + return raw_type, origin, args + + +def find_annotation( + annotations: Tuple[Any, ...], + anno_class: Type[MP], +) -> Optional[MP]: + m = None + for anno in annotations: + if isinstance(anno, anno_class): + if m is not None: + raise ValueError( + f"The annotation {anno_class.__name__} was used more than once" + ) + m = anno + return m diff --git a/src/debputy/package_build/__init__.py b/src/debputy/package_build/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/package_build/__init__.py diff --git a/src/debputy/package_build/assemble_deb.py b/src/debputy/package_build/assemble_deb.py new file mode 100644 index 0000000..bed60e6 --- /dev/null +++ b/src/debputy/package_build/assemble_deb.py @@ -0,0 +1,255 @@ +import json +import os +import subprocess +from typing import Optional, Sequence, List, Tuple + +from debputy import DEBPUTY_ROOT_DIR +from debputy.commands.debputy_cmd.context import CommandContext +from debputy.deb_packaging_support import setup_control_files +from debputy.debhelper_emulation import dhe_dbgsym_root_dir +from debputy.filesystem_scan import FSRootDir +from debputy.highlevel_manifest import HighLevelManifest +from debputy.intermediate_manifest import IntermediateManifest +from debputy.plugin.api.impl_types import PackageDataTable +from debputy.util import ( + escape_shell, + _error, + compute_output_filename, + scratch_dir, + ensure_dir, + _warn, + assume_not_none, +) + + +_RRR_DEB_ASSEMBLY_KEYWORD = "debputy/deb-assembly" +_WARNED_ABOUT_FALLBACK_ASSEMBLY = False + + +def _serialize_intermediate_manifest(members: IntermediateManifest) -> str: + serial_format = [m.to_manifest() for m in members] + return json.dumps(serial_format) + + +def determine_assembly_method( + package: str, + intermediate_manifest: IntermediateManifest, +) -> Tuple[bool, bool, List[str]]: + paths_needing_root = ( + tm for tm in intermediate_manifest if tm.owner != "root" or tm.group != "root" + ) + matched_path = next(paths_needing_root, None) + if matched_path is None: + return False, False, [] + rrr = os.environ.get("DEB_RULES_REQUIRES_ROOT") + if rrr and _RRR_DEB_ASSEMBLY_KEYWORD in rrr: + gain_root_cmd = os.environ.get("DEB_GAIN_ROOT_CMD") + if not gain_root_cmd: + _error( + "DEB_RULES_REQUIRES_ROOT contains a debputy keyword but DEB_GAIN_ROOT_CMD does not contain a " + '"gain root" command' + ) + return True, False, gain_root_cmd.split() + if rrr == "no": + global _WARNED_ABOUT_FALLBACK_ASSEMBLY + if not _WARNED_ABOUT_FALLBACK_ASSEMBLY: + _warn( + 'Using internal assembly method due to "Rules-Requires-Root" being "no" and dpkg-deb assembly would' + " require (fake)root for binary packages that needs it." + ) + _WARNED_ABOUT_FALLBACK_ASSEMBLY = True + return True, True, [] + + _error( + f'Due to the path "{matched_path.member_path}" in {package}, the package assembly will require (fake)root.' + " However, this command is not run as root nor was debputy requested to use a root command via" + f' "Rules-Requires-Root". Please consider adding "{_RRR_DEB_ASSEMBLY_KEYWORD}" to "Rules-Requires-Root"' + " in debian/control. Though, due to #1036865, you may have to revert to" + ' "Rules-Requires-Root: binary-targets" depending on which version of dpkg you need to support.' + ' Alternatively, you can set "Rules-Requires-Root: no" in debian/control and debputy will assemble' + " the package anyway. In this case, dpkg-deb will not be used, but the output should be bit-for-bit" + " compatible with what debputy would have produced with dpkg-deb (and root/fakeroot)." + ) + + +def assemble_debs( + context: CommandContext, + manifest: HighLevelManifest, + package_data_table: PackageDataTable, + is_dh_rrr_only_mode: bool, +) -> None: + parsed_args = context.parsed_args + output_path = parsed_args.output + upstream_args = parsed_args.upstream_args + deb_materialize = str(DEBPUTY_ROOT_DIR / "deb_materialization.py") + mtime = context.mtime + + for dctrl_bin in manifest.active_packages: + package = dctrl_bin.name + dbgsym_package_name = f"{package}-dbgsym" + dctrl_data = package_data_table[package] + fs_root = dctrl_data.fs_root + control_output_dir = assume_not_none(dctrl_data.control_output_dir) + package_metadata_context = dctrl_data.package_metadata_context + if ( + dbgsym_package_name in package_data_table + or "noautodbgsym" in manifest.build_env.deb_build_options + or "noddebs" in manifest.build_env.deb_build_options + ): + # Discard the dbgsym part if it conflicts with a real package, or + # we were asked not to build it. + dctrl_data.dbgsym_info.dbgsym_fs_root = FSRootDir() + dctrl_data.dbgsym_info.dbgsym_ids.clear() + dbgsym_fs_root = dctrl_data.dbgsym_info.dbgsym_fs_root + dbgsym_ids = dctrl_data.dbgsym_info.dbgsym_ids + intermediate_manifest = manifest.finalize_data_tar_contents( + package, fs_root, mtime + ) + + setup_control_files( + dctrl_data, + manifest, + dbgsym_fs_root, + dbgsym_ids, + package_metadata_context, + allow_ctrl_file_management=not is_dh_rrr_only_mode, + ) + + needs_root, use_fallback_assembly, gain_root_cmd = determine_assembly_method( + package, intermediate_manifest + ) + + if not dctrl_bin.is_udeb and any( + f for f in dbgsym_fs_root.all_paths() if f.is_file + ): + # We never built udebs due to #797391. We currently do not generate a control + # file for it either for the same reason. + dbgsym_root = dhe_dbgsym_root_dir(dctrl_bin) + if not os.path.isdir(output_path): + _error( + "Cannot produce a dbgsym package when output path is not a directory." + ) + dbgsym_intermediate_manifest = manifest.finalize_data_tar_contents( + dbgsym_package_name, + dbgsym_fs_root, + mtime, + ) + _assemble_deb( + dbgsym_package_name, + deb_materialize, + dbgsym_intermediate_manifest, + mtime, + os.path.join(dbgsym_root, "DEBIAN"), + output_path, + upstream_args, + is_udeb=dctrl_bin.is_udeb, # Review this if we ever do dbgsyms for udebs + use_fallback_assembly=False, + needs_root=False, + ) + + _assemble_deb( + package, + deb_materialize, + intermediate_manifest, + mtime, + control_output_dir, + output_path, + upstream_args, + is_udeb=dctrl_bin.is_udeb, + use_fallback_assembly=use_fallback_assembly, + needs_root=needs_root, + gain_root_cmd=gain_root_cmd, + ) + + +def _assemble_deb( + package: str, + deb_materialize_cmd: str, + intermediate_manifest: IntermediateManifest, + mtime: int, + control_output_dir: str, + output_path: str, + upstream_args: Optional[List[str]], + is_udeb: bool = False, + use_fallback_assembly: bool = False, + needs_root: bool = False, + gain_root_cmd: Optional[Sequence[str]] = None, +) -> None: + scratch_root_dir = scratch_dir() + materialization_dir = os.path.join( + scratch_root_dir, "materialization-dirs", package + ) + ensure_dir(os.path.dirname(materialization_dir)) + materialize_cmd: List[str] = [] + assert not use_fallback_assembly or not gain_root_cmd + if needs_root and gain_root_cmd: + # Only use the gain_root_cmd if we absolutely need it. + # Note that gain_root_cmd will be empty unless R³ is set to the relevant keyword + # that would make us use targeted promotion. Therefore, we do not need to check other + # conditions than the package needing root. (R³: binary-targets implies `needs_root=True` + # without a gain_root_cmd) + materialize_cmd.extend(gain_root_cmd) + materialize_cmd.extend( + [ + deb_materialize_cmd, + "materialize-deb", + "--intermediate-package-manifest", + "-", + "--may-move-control-files", + "--may-move-data-files", + "--source-date-epoch", + str(mtime), + "--discard-existing-output", + control_output_dir, + materialization_dir, + ] + ) + output = output_path + if is_udeb: + materialize_cmd.append("--udeb") + output = os.path.join( + output_path, compute_output_filename(control_output_dir, True) + ) + + assembly_method = "debputy" if needs_root and use_fallback_assembly else "dpkg-deb" + combined_materialization_and_assembly = not needs_root + if combined_materialization_and_assembly: + materialize_cmd.extend( + ["--build-method", assembly_method, "--assembled-deb-output", output] + ) + + if upstream_args: + materialize_cmd.append("--") + materialize_cmd.extend(upstream_args) + + if combined_materialization_and_assembly: + print( + f"Materializing and assembling {package} via: {escape_shell(*materialize_cmd)}" + ) + else: + print(f"Materializing {package} via: {escape_shell(*materialize_cmd)}") + proc = subprocess.Popen(materialize_cmd, stdin=subprocess.PIPE) + proc.communicate( + _serialize_intermediate_manifest(intermediate_manifest).encode("utf-8") + ) + if proc.returncode != 0: + _error(f"{escape_shell(deb_materialize_cmd)} exited with a non-zero exit code!") + + if not combined_materialization_and_assembly: + build_materialization = [ + deb_materialize_cmd, + "build-materialized-deb", + materialization_dir, + assembly_method, + "--output", + output, + ] + print(f"Assembling {package} via: {escape_shell(*build_materialization)}") + try: + subprocess.check_call(build_materialization) + except subprocess.CalledProcessError as e: + exit_code = f" with exit code {e.returncode}" if e.returncode else "" + _error( + f"Assembly command for {package} failed{exit_code}. Please review the output of the command" + f" for more details on the problem." + ) diff --git a/src/debputy/packager_provided_files.py b/src/debputy/packager_provided_files.py new file mode 100644 index 0000000..6d74999 --- /dev/null +++ b/src/debputy/packager_provided_files.py @@ -0,0 +1,323 @@ +import collections +import dataclasses +from typing import Mapping, Iterable, Dict, List, Optional, Tuple + +from debputy.packages import BinaryPackage +from debputy.plugin.api import VirtualPath +from debputy.plugin.api.impl_types import PackagerProvidedFileClassSpec +from debputy.util import _error + + +@dataclasses.dataclass(frozen=True, slots=True) +class PackagerProvidedFile: + path: VirtualPath + package_name: str + installed_as_basename: str + provided_key: str + definition: PackagerProvidedFileClassSpec + match_priority: int = 0 + fuzzy_match: bool = False + + def compute_dest(self) -> Tuple[str, str]: + return self.definition.compute_dest( + self.installed_as_basename, + owning_package=self.package_name, + path=self.path, + ) + + +@dataclasses.dataclass(frozen=True, slots=True) +class PerPackagePackagerProvidedResult: + auto_installable: List[PackagerProvidedFile] + reserved_only: Dict[str, List[PackagerProvidedFile]] + + +def _find_package_name_prefix( + binary_packages: Mapping[str, BinaryPackage], + main_binary_package: str, + max_periods_in_package_name: int, + path: VirtualPath, + *, + allow_fuzzy_matches: bool = False, +) -> Iterable[Tuple[str, str, bool, bool]]: + if max_periods_in_package_name < 1: + prefix, remaining = path.name.split(".", 1) + package_name = prefix + bug_950723 = False + if allow_fuzzy_matches and package_name.endswith("@"): + package_name = package_name[:-1] + bug_950723 = True + if package_name in binary_packages: + yield package_name, remaining, True, bug_950723 + else: + yield main_binary_package, path.name, False, False + return + + parts = path.name.split(".", max_periods_in_package_name + 1) + for p in range(len(parts) - 1, 0, -1): + name = ".".join(parts[0:p]) + bug_950723 = False + if allow_fuzzy_matches and name.endswith("@"): + name = name[:-1] + bug_950723 = True + + if name in binary_packages: + remaining = ".".join(parts[p:]) + yield name, remaining, True, bug_950723 + # main package case + yield main_binary_package, path.name, False, False + + +def _find_definition( + packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], + basename: str, +) -> Tuple[Optional[str], Optional[PackagerProvidedFileClassSpec]]: + definition = packager_provided_files.get(basename) + if definition is not None: + return None, definition + install_as_name = basename + file_class = "" + while "." in install_as_name: + install_as_name, file_class_part = install_as_name.rsplit(".", 1) + file_class = ( + file_class_part + "." + file_class if file_class != "" else file_class_part + ) + definition = packager_provided_files.get(file_class) + if definition is not None: + return install_as_name, definition + return None, None + + +def _check_mismatches( + path: VirtualPath, + definition: PackagerProvidedFileClassSpec, + owning_package: BinaryPackage, + install_as_name: Optional[str], + had_arch: bool, +) -> None: + if install_as_name is not None and not definition.allow_name_segment: + _error( + f'The file "{path.fs_path}" looks like a packager provided file for' + f' {owning_package.name} of type {definition.stem} with the custom name "{install_as_name}".' + " However, this file type does not allow custom naming. The file type was registered" + f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" + " to file a bug/feature request." + ) + if had_arch: + if owning_package.is_arch_all: + _error( + f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' + f" {owning_package.name} of type {definition.stem}." + " However, the package in question is arch:all. The use of architecture specific files" + " for arch:all packages does not make sense." + ) + if not definition.allow_architecture_segment: + _error( + f'The file "{path.fs_path}" looks like an architecture specific packager provided file for' + f" {owning_package.name} of type {definition.stem}." + " However, this file type does not allow architecture specific variants. The file type was registered" + f" by {definition.debputy_plugin_metadata.plugin_name} in case you disagree and want" + " to file a bug/feature request." + ) + + +def _split_path( + packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], + binary_packages: Mapping[str, BinaryPackage], + main_binary_package: str, + max_periods_in_package_name: int, + path: VirtualPath, + *, + allow_fuzzy_matches: bool = False, +) -> Iterable[PackagerProvidedFile]: + owning_package_name = main_binary_package + basename = path.name + match_priority = 0 + had_arch = False + if "." not in basename: + definition = packager_provided_files.get(basename) + if definition is None: + return + if definition.packageless_is_fallback_for_all_packages: + yield from ( + PackagerProvidedFile( + path=path, + package_name=n, + installed_as_basename=n, + provided_key=".UNNAMED.", + definition=definition, + match_priority=match_priority, + fuzzy_match=False, + ) + for n in binary_packages + ) + else: + yield PackagerProvidedFile( + path=path, + package_name=owning_package_name, + installed_as_basename=owning_package_name, + provided_key=".UNNAMED.", + definition=definition, + match_priority=match_priority, + fuzzy_match=False, + ) + return + + for ( + owning_package_name, + basename, + explicit_package, + bug_950723, + ) in _find_package_name_prefix( + binary_packages, + main_binary_package, + max_periods_in_package_name, + path, + allow_fuzzy_matches=allow_fuzzy_matches, + ): + owning_package = binary_packages[owning_package_name] + match_priority = 1 if explicit_package else 0 + fuzzy_match = False + + if allow_fuzzy_matches and basename.endswith(".in") and len(basename) > 3: + basename = basename[:-3] + fuzzy_match = True + + if "." in basename: + remaining, last_word = basename.rsplit(".", 1) + # We cannot use "resolved_architecture" as it would return "all". + if last_word == owning_package.package_deb_architecture_variable("ARCH"): + match_priority = 3 + basename = remaining + had_arch = True + elif last_word == owning_package.package_deb_architecture_variable( + "ARCH_OS" + ): + match_priority = 2 + basename = remaining + had_arch = True + elif last_word == "all" and owning_package.is_arch_all: + # This case does not make sense, but we detect it so we can report an error + # via _check_mismatches. + match_priority = -1 + basename = remaining + had_arch = True + + install_as_name, definition = _find_definition( + packager_provided_files, basename + ) + if definition is None: + continue + + # Note: bug_950723 implies allow_fuzzy_matches + if bug_950723 and not definition.bug_950723: + continue + + _check_mismatches( + path, + definition, + owning_package, + install_as_name, + had_arch, + ) + if ( + definition.packageless_is_fallback_for_all_packages + and install_as_name is None + and not had_arch + and not explicit_package + ): + yield from ( + PackagerProvidedFile( + path=path, + package_name=n, + installed_as_basename=f"{n}@" if bug_950723 else n, + provided_key=".UNNAMED." if bug_950723 else ".UNNAMED@.", + definition=definition, + match_priority=match_priority, + fuzzy_match=fuzzy_match, + ) + for n in binary_packages + ) + else: + provided_key = ( + install_as_name if install_as_name is not None else ".UNNAMED." + ) + basename = ( + install_as_name if install_as_name is not None else owning_package_name + ) + if bug_950723: + provided_key = f"{provided_key}@" + basename = f"{basename}@" + yield PackagerProvidedFile( + path=path, + package_name=owning_package_name, + installed_as_basename=basename, + provided_key=provided_key, + definition=definition, + match_priority=match_priority, + fuzzy_match=fuzzy_match, + ) + return + + +def detect_all_packager_provided_files( + packager_provided_files: Mapping[str, PackagerProvidedFileClassSpec], + debian_dir: VirtualPath, + binary_packages: Mapping[str, BinaryPackage], + *, + allow_fuzzy_matches: bool = False, +) -> Dict[str, PerPackagePackagerProvidedResult]: + main_binary_package = [ + p.name for p in binary_packages.values() if p.is_main_package + ][0] + provided_files: Dict[str, Dict[Tuple[str, str], PackagerProvidedFile]] = { + n: {} for n in binary_packages + } + max_periods_in_package_name = max(name.count(".") for name in binary_packages) + + for entry in debian_dir.iterdir: + if entry.is_dir: + continue + matching_ppfs = _split_path( + packager_provided_files, + binary_packages, + main_binary_package, + max_periods_in_package_name, + entry, + allow_fuzzy_matches=allow_fuzzy_matches, + ) + for packager_provided_file in matching_ppfs: + provided_files_for_package = provided_files[ + packager_provided_file.package_name + ] + match_key = ( + packager_provided_file.definition.stem, + packager_provided_file.provided_key, + ) + existing = provided_files_for_package.get(match_key) + if ( + existing is not None + and existing.match_priority > packager_provided_file.match_priority + ): + continue + provided_files_for_package[match_key] = packager_provided_file + + result = {} + for package_name, provided_file_data in provided_files.items(): + auto_install_list = [ + x for x in provided_file_data.values() if not x.definition.reservation_only + ] + reservation_only = collections.defaultdict(list) + for packager_provided_file in provided_file_data.values(): + if not packager_provided_file.definition.reservation_only: + continue + reservation_only[packager_provided_file.definition.stem].append( + packager_provided_file + ) + + result[package_name] = PerPackagePackagerProvidedResult( + auto_install_list, + reservation_only, + ) + + return result diff --git a/src/debputy/packages.py b/src/debputy/packages.py new file mode 100644 index 0000000..3204f46 --- /dev/null +++ b/src/debputy/packages.py @@ -0,0 +1,332 @@ +from typing import ( + Dict, + Union, + Tuple, + Optional, + Set, + cast, + Mapping, + FrozenSet, + TYPE_CHECKING, +) + +from debian.deb822 import Deb822 +from debian.debian_support import DpkgArchTable + +from ._deb_options_profiles import DebBuildOptionsAndProfiles +from .architecture_support import ( + DpkgArchitectureBuildProcessValuesTable, + dpkg_architecture_table, +) +from .util import DEFAULT_PACKAGE_TYPE, UDEB_PACKAGE_TYPE, _error, active_profiles_match + +if TYPE_CHECKING: + from .plugin.api import VirtualPath + + +_MANDATORY_BINARY_PACKAGE_FIELD = [ + "Package", + "Architecture", +] + + +def parse_source_debian_control( + debian_control: "VirtualPath", + selected_packages: Union[Set[str], FrozenSet[str]], + excluded_packages: Union[Set[str], FrozenSet[str]], + select_arch_all: bool, + select_arch_any: bool, + dpkg_architecture_variables: Optional[ + DpkgArchitectureBuildProcessValuesTable + ] = None, + dpkg_arch_query_table: Optional[DpkgArchTable] = None, + build_env: Optional[DebBuildOptionsAndProfiles] = None, +) -> Tuple["SourcePackage", Dict[str, "BinaryPackage"]]: + if dpkg_architecture_variables is None: + dpkg_architecture_variables = dpkg_architecture_table() + if dpkg_arch_query_table is None: + dpkg_arch_query_table = DpkgArchTable.load_arch_table() + if build_env is None: + build_env = DebBuildOptionsAndProfiles.instance() + + # If no selection option is set, then all packages are acted on (except the + # excluded ones) + if not selected_packages and not select_arch_all and not select_arch_any: + select_arch_all = True + select_arch_any = True + + with debian_control.open() as fd: + dctrl_paragraphs = list(Deb822.iter_paragraphs(fd)) + + if len(dctrl_paragraphs) < 2: + _error( + "debian/control must contain at least two stanza (1 Source + 1-N Package stanza)" + ) + + source_package = SourcePackage(dctrl_paragraphs[0]) + + bin_pkgs = [ + _create_binary_package( + p, + selected_packages, + excluded_packages, + select_arch_all, + select_arch_any, + dpkg_architecture_variables, + dpkg_arch_query_table, + build_env, + i, + ) + for i, p in enumerate(dctrl_paragraphs[1:], 1) + ] + bin_pkgs_table = {p.name: p for p in bin_pkgs} + if not selected_packages.issubset(bin_pkgs_table.keys()): + unknown = selected_packages - bin_pkgs_table.keys() + _error( + f"The following *selected* packages (-p) are not listed in debian/control: {sorted(unknown)}" + ) + if not excluded_packages.issubset(bin_pkgs_table.keys()): + unknown = selected_packages - bin_pkgs_table.keys() + _error( + f"The following *excluded* packages (-N) are not listed in debian/control: {sorted(unknown)}" + ) + + return source_package, bin_pkgs_table + + +def _check_package_sets( + provided_packages: Set[str], + valid_package_names: Set[str], + option_name: str, +) -> None: + # SonarLint proposes to use `provided_packages > valid_package_names`, which is valid for boolean + # logic, but not for set logic. We want to assert that provided_packages is a proper subset + # of valid_package_names. The rewrite would cause no errors for {'foo'} > {'bar'} - in set logic, + # neither is a superset / subset of the other, but we want an error for this case. + # + # Bug filed: + # https://community.sonarsource.com/t/sonarlint-python-s1940-rule-does-not-seem-to-take-set-logic-into-account/79718 + if not (provided_packages <= valid_package_names): + non_existing_packages = sorted(provided_packages - valid_package_names) + invalid_package_list = ", ".join(non_existing_packages) + msg = ( + f"Invalid package names passed to {option_name}: {invalid_package_list}: " + f'Valid package names are: {", ".join(valid_package_names)}' + ) + _error(msg) + + +def _create_binary_package( + paragraph: Union[Deb822, Dict[str, str]], + selected_packages: Union[Set[str], FrozenSet[str]], + excluded_packages: Union[Set[str], FrozenSet[str]], + select_arch_all: bool, + select_arch_any: bool, + dpkg_architecture_variables: DpkgArchitectureBuildProcessValuesTable, + dpkg_arch_query_table: DpkgArchTable, + build_env: DebBuildOptionsAndProfiles, + paragraph_index: int, +) -> "BinaryPackage": + try: + package_name = paragraph["Package"] + except KeyError: + _error(f'Missing mandatory field "Package" in stanza number {paragraph_index}') + # The raise is there to help PyCharm type-checking (which fails at "NoReturn") + raise + + for mandatory_field in _MANDATORY_BINARY_PACKAGE_FIELD: + if mandatory_field not in paragraph: + _error( + f'Missing mandatory field "{mandatory_field}" for binary package {package_name}' + f" (stanza number {paragraph_index})" + ) + + architecture = paragraph["Architecture"] + + if paragraph_index < 1: + raise ValueError("stanza index must be 1-indexed (1, 2, ...)") + is_main_package = paragraph_index == 1 + + if package_name in excluded_packages: + should_act_on = False + elif package_name in selected_packages: + should_act_on = True + elif architecture == "all": + should_act_on = select_arch_all + else: + should_act_on = select_arch_any + + profiles_raw = paragraph.get("Build-Profiles", "").strip() + if should_act_on and profiles_raw: + try: + should_act_on = active_profiles_match( + profiles_raw, build_env.deb_build_profiles + ) + except ValueError as e: + _error(f"Invalid Build-Profiles field for {package_name}: {e.args[0]}") + + return BinaryPackage( + paragraph, + dpkg_architecture_variables, + dpkg_arch_query_table, + should_be_acted_on=should_act_on, + is_main_package=is_main_package, + ) + + +def _check_binary_arch( + arch_table: DpkgArchTable, + binary_arch: str, + declared_arch: str, +) -> bool: + if binary_arch == "all": + return True + arch_wildcards = declared_arch.split() + for arch_wildcard in arch_wildcards: + if arch_table.matches_architecture(binary_arch, arch_wildcard): + return True + return False + + +class BinaryPackage: + __slots__ = [ + "_package_fields", + "_dbgsym_binary_package", + "_should_be_acted_on", + "_dpkg_architecture_variables", + "_declared_arch_matches_output_arch", + "_is_main_package", + "_substvars", + "_maintscript_snippets", + ] + + def __init__( + self, + fields: Union[Mapping[str, str], Deb822], + dpkg_architecture_variables: DpkgArchitectureBuildProcessValuesTable, + dpkg_arch_query: DpkgArchTable, + *, + is_main_package: bool = False, + should_be_acted_on: bool = True, + ) -> None: + super(BinaryPackage, self).__init__() + # Typing-wise, Deb822 is *not* a Mapping[str, str] but it behaves enough + # like one that we rely on it and just cast it. + self._package_fields = cast("Mapping[str, str]", fields) + self._dbgsym_binary_package = None + self._should_be_acted_on = should_be_acted_on + self._dpkg_architecture_variables = dpkg_architecture_variables + self._is_main_package = is_main_package + self._declared_arch_matches_output_arch = _check_binary_arch( + dpkg_arch_query, self.resolved_architecture, self.declared_architecture + ) + + @property + def name(self) -> str: + return self.fields["Package"] + + @property + def archive_section(self) -> str: + value = self.fields.get("Section") + if value is None: + return "Unknown" + return value + + @property + def archive_component(self) -> str: + component = "" + section = self.archive_section + if "/" in section: + component = section.rsplit("/", 1)[0] + # The "main" component is always shortened to "" + if component == "main": + component = "" + return component + + @property + def is_essential(self) -> bool: + return self._package_fields.get("Essential") == "yes" + + @property + def is_udeb(self) -> bool: + return self.package_type == UDEB_PACKAGE_TYPE + + @property + def should_be_acted_on(self) -> bool: + return self._should_be_acted_on and self._declared_arch_matches_output_arch + + @property + def fields(self) -> Mapping[str, str]: + return self._package_fields + + @property + def resolved_architecture(self) -> str: + arch = self.declared_architecture + if arch == "all": + return arch + if self._x_dh_build_for_type == "target": + return self._dpkg_architecture_variables["DEB_TARGET_ARCH"] + return self._dpkg_architecture_variables.current_host_arch + + def package_deb_architecture_variable(self, variable_suffix: str) -> str: + if self._x_dh_build_for_type == "target": + return self._dpkg_architecture_variables[f"DEB_TARGET_{variable_suffix}"] + return self._dpkg_architecture_variables[f"DEB_HOST_{variable_suffix}"] + + @property + def deb_multiarch(self) -> str: + return self.package_deb_architecture_variable("MULTIARCH") + + @property + def _x_dh_build_for_type(self) -> str: + v = self._package_fields.get("X-DH-Build-For-Type") + if v is None: + return "host" + return v.lower() + + @property + def package_type(self) -> str: + """Short for Package-Type (with proper default if absent)""" + v = self.fields.get("Package-Type") + if v is None: + return DEFAULT_PACKAGE_TYPE + return v + + @property + def is_main_package(self) -> bool: + return self._is_main_package + + def cross_command(self, command: str) -> str: + arch_table = self._dpkg_architecture_variables + if self._x_dh_build_for_type == "target": + target_gnu_type = arch_table["DEB_TARGET_GNU_TYPE"] + if arch_table["DEB_HOST_GNU_TYPE"] != target_gnu_type: + return f"{target_gnu_type}-{command}" + if arch_table.is_cross_compiling: + return f"{arch_table['DEB_HOST_GNU_TYPE']}-{command}" + return command + + @property + def declared_architecture(self) -> str: + return self.fields["Architecture"] + + @property + def is_arch_all(self) -> bool: + return self.declared_architecture == "all" + + +class SourcePackage: + __slots__ = ("_package_fields",) + + def __init__(self, fields: Union[Mapping[str, str], Deb822]): + # Typing-wise, Deb822 is *not* a Mapping[str, str] but it behaves enough + # like one that we rely on it and just cast it. + self._package_fields = cast("Mapping[str, str]", fields) + + @property + def fields(self) -> Mapping[str, str]: + return self._package_fields + + @property + def name(self) -> str: + return self._package_fields["Source"] diff --git a/src/debputy/packaging/__init__.py b/src/debputy/packaging/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/packaging/__init__.py diff --git a/src/debputy/packaging/alternatives.py b/src/debputy/packaging/alternatives.py new file mode 100644 index 0000000..249fa9e --- /dev/null +++ b/src/debputy/packaging/alternatives.py @@ -0,0 +1,225 @@ +import textwrap +from typing import List, Dict, Tuple, Mapping + +from debian.deb822 import Deb822 + +from debputy.maintscript_snippet import MaintscriptSnippetContainer, MaintscriptSnippet +from debputy.packager_provided_files import PackagerProvidedFile +from debputy.packages import BinaryPackage +from debputy.packaging.makeshlibs import resolve_reserved_provided_file +from debputy.plugin.api import VirtualPath +from debputy.util import _error, escape_shell, POSTINST_DEFAULT_CONDITION + +# Match debhelper (minus one space in each end, which comes +# via join). +LINE_PREFIX = "\\\n " + + +def process_alternatives( + binary_package: BinaryPackage, + fs_root: VirtualPath, + reserved_packager_provided_files: Dict[str, List[PackagerProvidedFile]], + maintscript_snippets: Dict[str, MaintscriptSnippetContainer], +) -> None: + if binary_package.is_udeb: + return + + provided_alternatives_file = resolve_reserved_provided_file( + "alternatives", + reserved_packager_provided_files, + ) + if provided_alternatives_file is None: + return + + with provided_alternatives_file.open() as fd: + alternatives = list(Deb822.iter_paragraphs(fd)) + + for no, alternative in enumerate(alternatives): + process_alternative( + provided_alternatives_file.fs_path, + fs_root, + alternative, + no, + maintscript_snippets, + ) + + +def process_alternative( + provided_alternatives_fs_path: str, + fs_root: VirtualPath, + alternative_deb822: Deb822, + no: int, + maintscript_snippets: Dict[str, MaintscriptSnippetContainer], +) -> None: + name = _mandatory_key( + "Name", + alternative_deb822, + provided_alternatives_fs_path, + f"Stanza number {no}", + ) + error_context = f"Alternative named {name}" + link_path = _mandatory_key( + "Link", + alternative_deb822, + provided_alternatives_fs_path, + error_context, + ) + impl_path = _mandatory_key( + "Alternative", + alternative_deb822, + provided_alternatives_fs_path, + error_context, + ) + priority = _mandatory_key( + "Priority", + alternative_deb822, + provided_alternatives_fs_path, + error_context, + ) + if "/" in name: + _error( + f'The "Name" ({link_path}) key must be a basename and cannot contain slashes' + f" ({error_context} in {provided_alternatives_fs_path})" + ) + if link_path == impl_path: + _error( + f'The "Link" key and the "Alternative" key must not have the same value' + f" ({error_context} in {provided_alternatives_fs_path})" + ) + impl = fs_root.lookup(impl_path) + if impl is None or impl.is_dir: + _error( + f'The path listed in "Alternative" ("{impl_path}") does not exist' + f" in the package. ({error_context} in {provided_alternatives_fs_path})" + ) + for key in ["Slave", "Slaves", "Slave-Links"]: + if key in alternative_deb822: + _error( + f'Please use "Dependents" instead of "{key}".' + f" ({error_context} in {provided_alternatives_fs_path})" + ) + dependents = alternative_deb822.get("Dependents") + install_command = [ + escape_shell( + "update-alternatives", + "--install", + link_path, + name, + impl_path, + priority, + ) + ] + remove_command = [ + escape_shell( + "update-alternatives", + "--remove", + link_path, + impl_path, + ) + ] + if dependents: + seen_link_path = set() + for line in dependents.splitlines(): + line = line.strip() + if not line: # First line is usually empty + continue + dlink_path, dlink_name, dimpl_path = parse_dependent_link( + line, + error_context, + provided_alternatives_fs_path, + ) + if dlink_path in seen_link_path: + _error( + f'The Dependent link path "{dlink_path}" was used twice.' + f" ({error_context} in {provided_alternatives_fs_path})" + ) + dimpl = fs_root.lookup(dimpl_path) + if dimpl is None or dimpl.is_dir: + _error( + f'The path listed in "Dependents" ("{dimpl_path}") does not exist' + f" in the package. ({error_context} in {provided_alternatives_fs_path})" + ) + seen_link_path.add(dlink_path) + install_command.append(LINE_PREFIX) + install_command.append( + escape_shell( + # update-alternatives still uses this old option name :-/ + "--slave", + dlink_path, + dlink_name, + dimpl_path, + ) + ) + postinst = textwrap.dedent( + """\ + if {CONDITION}; then + {COMMAND} + fi + """ + ).format( + CONDITION=POSTINST_DEFAULT_CONDITION, + COMMAND=" ".join(install_command), + ) + + prerm = textwrap.dedent( + """\ + if [ "$1" = "remove" ]; then + {COMMAND} + fi + """ + ).format(COMMAND=" ".join(remove_command)) + maintscript_snippets["postinst"].append( + MaintscriptSnippet( + f"debputy (via {provided_alternatives_fs_path})", + snippet=postinst, + ) + ) + maintscript_snippets["prerm"].append( + MaintscriptSnippet( + f"debputy (via {provided_alternatives_fs_path})", + snippet=prerm, + ) + ) + + +def parse_dependent_link( + line: str, + error_context: str, + provided_alternatives_file: str, +) -> Tuple[str, str, str]: + parts = line.split() + if len(parts) != 3: + if len(parts) > 1: + pass + _error( + f"The each line in Dependents links must have exactly 3 space separated parts." + f' The "{line}" split into {len(parts)} part(s).' + f" ({error_context} in {provided_alternatives_file})" + ) + + dlink_path, dlink_name, dimpl_path = parts + if "/" in dlink_name: + _error( + f'The Dependent link name "{dlink_path}" must be a basename and cannot contain slashes' + f" ({error_context} in {provided_alternatives_file})" + ) + if dlink_path == dimpl_path: + _error( + f'The Dependent Link path and Alternative must not have the same value ["{dlink_path}"]' + f" ({error_context} in {provided_alternatives_file})" + ) + return dlink_path, dlink_name, dimpl_path + + +def _mandatory_key( + key: str, + alternative_deb822: Mapping[str, str], + provided_alternatives_file: str, + error_context: str, +) -> str: + try: + return alternative_deb822[key] + except KeyError: + _error( + f'Missing mandatory key "{key}" in {provided_alternatives_file} ({error_context})' + ) diff --git a/src/debputy/packaging/debconf_templates.py b/src/debputy/packaging/debconf_templates.py new file mode 100644 index 0000000..b827763 --- /dev/null +++ b/src/debputy/packaging/debconf_templates.py @@ -0,0 +1,77 @@ +import os.path +import shutil +import subprocess +import textwrap +from typing import List, Dict + +from debputy.maintscript_snippet import MaintscriptSnippetContainer, MaintscriptSnippet +from debputy.packager_provided_files import PackagerProvidedFile +from debputy.packages import BinaryPackage +from debputy.packaging.makeshlibs import resolve_reserved_provided_file +from debputy.plugin.api.spec import FlushableSubstvars +from debputy.util import _error, escape_shell + +# Match debhelper (minus one space in each end, which comes +# via join). +LINE_PREFIX = "\\\n " + + +def process_debconf_templates( + binary_package: BinaryPackage, + reserved_packager_provided_files: Dict[str, List[PackagerProvidedFile]], + maintscript_snippets: Dict[str, MaintscriptSnippetContainer], + substvars: FlushableSubstvars, + control_output_dir: str, +) -> None: + provided_templates_file = resolve_reserved_provided_file( + "templates", + reserved_packager_provided_files, + ) + if provided_templates_file is None: + return + + templates_file = os.path.join(control_output_dir, "templates") + debian_dir = provided_templates_file.parent_dir + po_template_dir = debian_dir.get("po") if debian_dir is not None else None + if po_template_dir is not None and po_template_dir.is_dir: + with open(templates_file, "wb") as fd: + cmd = [ + "po2debconf", + provided_templates_file.fs_path, + ] + print(f" {escape_shell(*cmd)} > {templates_file}") + try: + subprocess.check_call( + cmd, + stdout=fd.fileno(), + ) + except subprocess.CalledProcessError: + _error( + f"Failed to generate the templates files for {binary_package.name}. Please review " + f" the output of {escape_shell('po-debconf', provided_templates_file.fs_path)}" + " to understand the issue." + ) + else: + shutil.copyfile(provided_templates_file.fs_path, templates_file) + + dependency = ( + "cdebconf-udeb" if binary_package.is_udeb else "debconf (>= 0.5) | debconf-2.0" + ) + substvars.add_dependency("misc:Depends", dependency) + if not binary_package.is_udeb: + # udebs do not have `postrm` scripts + maintscript_snippets["postrm"].append( + MaintscriptSnippet( + f"debputy (due to {provided_templates_file.fs_path})", + # FIXME: `debconf` sourcing should be an overarching feature + snippet=textwrap.dedent( + """\ + if [ "$1" = purge ] && [ -e /usr/share/debconf/confmodule ]; then + . /usr/share/debconf/confmodule + db_purge + db_stop + fi + """ + ), + ) + ) diff --git a/src/debputy/packaging/makeshlibs.py b/src/debputy/packaging/makeshlibs.py new file mode 100644 index 0000000..127a64d --- /dev/null +++ b/src/debputy/packaging/makeshlibs.py @@ -0,0 +1,314 @@ +import collections +import dataclasses +import os +import re +import shutil +import stat +import subprocess +import tempfile +from contextlib import suppress +from typing import Optional, Set, List, Tuple, TYPE_CHECKING, Dict, IO + +from debputy import elf_util +from debputy.elf_util import ELF_LINKING_TYPE_DYNAMIC +from debputy.exceptions import DebputyDpkgGensymbolsError +from debputy.packager_provided_files import PackagerProvidedFile +from debputy.packages import BinaryPackage +from debputy.plugin.api import VirtualPath, PackageProcessingContext, BinaryCtrlAccessor +from debputy.util import ( + print_command, + escape_shell, + assume_not_none, + _normalize_link_target, + _warn, + _error, +) + +if TYPE_CHECKING: + from debputy.highlevel_manifest import HighLevelManifest + + +HAS_SONAME = re.compile(r"\s+SONAME\s+(\S+)") +SHLIBS_LINE_READER = re.compile(r"^(?:(\S*):)?\s*(\S+)\s*(\S+)\s*(\S.+)$") +SONAME_FORMATS = [ + re.compile(r"\s+SONAME\s+((.*)[.]so[.](.*))"), + re.compile(r"\s+SONAME\s+((.*)-(\d.*)[.]so)"), +] + + +@dataclasses.dataclass +class SONAMEInfo: + path: VirtualPath + full_soname: str + library: str + major_version: Optional[str] + + +class ShlibsContent: + def __init__(self) -> None: + self._deb_lines: List[str] = [] + self._udeb_lines: List[str] = [] + self._seen: Set[Tuple[str, str, str]] = set() + + def add_library( + self, + library: str, + major_version: str, + dependency: str, + *, + udeb_dependency: Optional[str] = None, + ) -> None: + line = f"{library} {major_version} {dependency}\n" + seen_key = ("deb", library, major_version) + if seen_key not in self._seen: + self._deb_lines.append(line) + self._seen.add(seen_key) + if udeb_dependency is not None: + seen_key = ("udeb", library, major_version) + udeb_line = f"udeb: {library} {major_version} {udeb_dependency}\n" + if seen_key not in self._seen: + self._udeb_lines.append(udeb_line) + self._seen.add(seen_key) + + def __bool__(self) -> bool: + return bool(self._deb_lines) or bool(self._udeb_lines) + + def add_entries_from_shlibs_file(self, fd: IO[str]) -> None: + for line in fd: + if line.startswith("#") or line.isspace(): + continue + m = SHLIBS_LINE_READER.match(line) + if not m: + continue + shtype, library, major_version, dependency = m.groups() + if shtype is None or shtype == "": + shtype = "deb" + seen_key = (shtype, library, major_version) + if seen_key in self._seen: + continue + self._seen.add(seen_key) + if shtype == "udeb": + self._udeb_lines.append(line) + else: + self._deb_lines.append(line) + + def write_to(self, fd: IO[str]) -> None: + fd.writelines(self._deb_lines) + fd.writelines(self._udeb_lines) + + +def extract_so_name( + binary_package: BinaryPackage, + path: VirtualPath, +) -> Optional[SONAMEInfo]: + objdump = binary_package.cross_command("objdump") + output = subprocess.check_output([objdump, "-p", path.fs_path], encoding="utf-8") + for r in SONAME_FORMATS: + m = r.search(output) + if m: + full_soname, library, major_version = m.groups() + return SONAMEInfo(path, full_soname, library, major_version) + m = HAS_SONAME.search(output) + if not m: + return None + full_soname = m.group(1) + return SONAMEInfo(path, full_soname, full_soname, None) + + +def extract_soname_info( + binary_package: BinaryPackage, + fs_root: VirtualPath, +) -> List[SONAMEInfo]: + so_files = elf_util.find_all_elf_files( + fs_root, + with_linking_type=ELF_LINKING_TYPE_DYNAMIC, + ) + result = [] + for so_file in so_files: + soname_info = extract_so_name(binary_package, so_file) + if not soname_info: + continue + result.append(soname_info) + return result + + +def _compute_shlibs_content( + binary_package: BinaryPackage, + manifest: "HighLevelManifest", + soname_info_list: List[SONAMEInfo], + udeb_package_name: Optional[str], + combined_shlibs: ShlibsContent, +) -> Tuple[ShlibsContent, bool]: + shlibs_file_contents = ShlibsContent() + unversioned_so_seen = False + strict_version = manifest.package_state_for(binary_package.name).binary_version + if strict_version is not None: + upstream_version = re.sub(r"-[^-]+$", "", strict_version) + else: + strict_version = manifest.substitution.substitute( + "{{DEB_VERSION}}", "<internal-usage>" + ) + upstream_version = manifest.substitution.substitute( + "{{DEB_VERSION_EPOCH_UPSTREAM}}", "<internal-usage>" + ) + + dependency = f"{binary_package.name} (>= {upstream_version})" + strict_dependency = f"{binary_package.name} (= {strict_version})" + udeb_dependency = None + + if udeb_package_name is not None: + udeb_dependency = f"{udeb_package_name} (>= {upstream_version})" + + for soname_info in soname_info_list: + if soname_info.major_version is None: + unversioned_so_seen = True + continue + shlibs_file_contents.add_library( + soname_info.library, + soname_info.major_version, + dependency, + udeb_dependency=udeb_dependency, + ) + combined_shlibs.add_library( + soname_info.library, + soname_info.major_version, + strict_dependency, + udeb_dependency=udeb_dependency, + ) + + return shlibs_file_contents, unversioned_so_seen + + +def resolve_reserved_provided_file( + basename: str, + reserved_packager_provided_files: Dict[str, List[PackagerProvidedFile]], +) -> Optional[VirtualPath]: + matches = reserved_packager_provided_files.get(basename) + if matches is None: + return None + assert len(matches) < 2 + if matches: + return matches[0].path + return None + + +def generate_shlib_dirs( + pkg: BinaryPackage, + root_dir: str, + soname_info_list: List[SONAMEInfo], + materialized_dirs: List[str], +) -> None: + dir_scanned: Dict[str, Dict[str, Set[str]]] = {} + dirs: Dict[str, str] = {} + + for soname_info in soname_info_list: + elf_binary = soname_info.path + p = assume_not_none(elf_binary.parent_dir) + matches = dir_scanned.get(p.absolute) + materialized_dir = dirs.get(p.absolute) + if matches is None: + matches = collections.defaultdict(set) + for child in p.iterdir: + if not child.is_symlink: + continue + target = _normalize_link_target(child.readlink()) + if "/" in target: + # The shlib symlinks (we are interested in) are relative to the same folder + continue + matches[target].add(child.name) + dir_scanned[p.absolute] = matches + symlinks = matches.get(elf_binary.name) + if not symlinks: + _warn( + f"Could not find any SO symlinks pointing to {elf_binary.absolute} in {pkg.name} !?" + ) + continue + if materialized_dir is None: + materialized_dir = tempfile.mkdtemp(prefix=f"{pkg.name}_", dir=root_dir) + materialized_dirs.append(materialized_dir) + dirs[p.absolute] = materialized_dir + + os.symlink(elf_binary.fs_path, os.path.join(materialized_dir, elf_binary.name)) + for link in symlinks: + os.symlink(elf_binary.name, os.path.join(materialized_dir, link)) + + +def compute_shlibs( + binary_package: BinaryPackage, + control_output_dir: str, + fs_root: VirtualPath, + manifest: "HighLevelManifest", + udeb_package_name: Optional[str], + ctrl: BinaryCtrlAccessor, + reserved_packager_provided_files: Dict[str, List[PackagerProvidedFile]], + combined_shlibs: ShlibsContent, +) -> List[SONAMEInfo]: + assert not binary_package.is_udeb + shlibs_file = os.path.join(control_output_dir, "shlibs") + need_ldconfig = False + so_files = elf_util.find_all_elf_files( + fs_root, + with_linking_type=ELF_LINKING_TYPE_DYNAMIC, + ) + sonames = extract_soname_info(binary_package, fs_root) + provided_shlibs_file = resolve_reserved_provided_file( + "shlibs", + reserved_packager_provided_files, + ) + symbols_template_file = resolve_reserved_provided_file( + "symbols", + reserved_packager_provided_files, + ) + + if provided_shlibs_file: + need_ldconfig = True + unversioned_so_seen = False + shutil.copyfile(provided_shlibs_file.fs_path, shlibs_file) + with open(shlibs_file) as fd: + combined_shlibs.add_entries_from_shlibs_file(fd) + else: + shlibs_file_contents, unversioned_so_seen = _compute_shlibs_content( + binary_package, + manifest, + sonames, + udeb_package_name, + combined_shlibs, + ) + + if shlibs_file_contents: + need_ldconfig = True + with open(shlibs_file, "wt", encoding="utf-8") as fd: + shlibs_file_contents.write_to(fd) + + if symbols_template_file: + symbols_file = os.path.join(control_output_dir, "symbols") + symbols_cmd = [ + "dpkg-gensymbols", + f"-p{binary_package.name}", + f"-I{symbols_template_file.fs_path}", + f"-P{control_output_dir}", + f"-O{symbols_file}", + ] + + if so_files: + symbols_cmd.extend(f"-e{x.fs_path}" for x in so_files) + print_command(*symbols_cmd) + try: + subprocess.check_call(symbols_cmd) + except subprocess.CalledProcessError as e: + # Wrap in a special error, so debputy can run the other packages. + # The kde symbols helper relies on this behaviour + raise DebputyDpkgGensymbolsError( + f"Error while running command for {binary_package.name}: {escape_shell(*symbols_cmd)}" + ) from e + + with suppress(FileNotFoundError): + st = os.stat(symbols_file) + if stat.S_ISREG(st.st_mode) and st.st_size == 0: + os.unlink(symbols_file) + elif unversioned_so_seen: + need_ldconfig = True + + if need_ldconfig: + ctrl.dpkg_trigger("activate-noawait", "ldconfig") + return sonames diff --git a/src/debputy/path_matcher.py b/src/debputy/path_matcher.py new file mode 100644 index 0000000..47e5c91 --- /dev/null +++ b/src/debputy/path_matcher.py @@ -0,0 +1,529 @@ +import fnmatch +import glob +import itertools +import os +import re +from enum import Enum +from typing import ( + Callable, + Optional, + TypeVar, + Iterable, + Union, + Sequence, + Tuple, +) + +from debputy.intermediate_manifest import PathType +from debputy.plugin.api import VirtualPath +from debputy.substitution import Substitution, NULL_SUBSTITUTION +from debputy.types import VP +from debputy.util import _normalize_path, _error, escape_shell + +MR = TypeVar("MR") +_GLOB_PARTS = re.compile(r"[*?]|\[]?[^]]+]") + + +def _lookup_path(fs_root: VP, path: str) -> Optional[VP]: + if not path.startswith("./"): + raise ValueError("Directory must be normalized (and not the root directory)") + if fs_root.name != "." or fs_root.parent_dir is not None: + raise ValueError("Provided fs_root must be the root directory") + # TODO: Strictly speaking, this is unsound. (E.g., FSRootDir does not return FSRootDir on a lookup) + return fs_root.lookup(path[2:]) + + +def _compile_basename_glob( + basename_glob: str, +) -> Tuple[Optional[str], Callable[[str], bool]]: + remainder = None + if not glob.has_magic(basename_glob): + return escape_shell(basename_glob), lambda x: x == basename_glob + + if basename_glob.startswith("*"): + if basename_glob.endswith("*"): + remainder = basename_glob[1:-1] + possible_quick_match = lambda x: remainder in x + escaped_pattern = "*" + escape_shell(remainder) + "*" + else: + remainder = basename_glob[1:] + possible_quick_match = lambda x: x.endswith(remainder) + escaped_pattern = "*" + escape_shell(remainder) + else: + remainder = basename_glob[:-1] + possible_quick_match = lambda x: x.startswith(remainder) + escaped_pattern = escape_shell(remainder) + "*" + + if not glob.has_magic(remainder): + return escaped_pattern, possible_quick_match + slow_pattern = re.compile(fnmatch.translate(basename_glob)) + return None, lambda x: bool(slow_pattern.match(x)) + + +def _apply_match( + fs_path: VP, + match_part: Union[Callable[[str], bool], str], +) -> Iterable[VP]: + if isinstance(match_part, str): + m = fs_path.lookup(match_part) + if m: + yield m + else: + yield from (p for p in fs_path.iterdir if match_part(p.name)) + + +class MatchRuleType(Enum): + EXACT_MATCH = "exact" + BASENAME_GLOB = "basename-glob" + DIRECT_CHILDREN_OF_DIR = "direct-children-of-dir" + ANYTHING_BENEATH_DIR = "anything-beneath-dir" + GENERIC_GLOB = "generic-glob" + MATCH_ANYTHING = "match-anything" + + +class MatchRule: + __slots__ = ("_rule_type",) + + def __init__(self, rule_type: MatchRuleType) -> None: + self._rule_type = rule_type + + @property + def rule_type(self) -> MatchRuleType: + return self._rule_type + + def finditer( + self, + fs_root: VP, + *, + ignore_paths: Optional[Callable[[VP], bool]] = None, + ) -> Iterable[VP]: + # TODO: Strictly speaking, this is unsound. (E.g., FSRootDir does not return FSRootDir on a lookup) + raise NotImplementedError + + def _full_pattern(self) -> str: + raise NotImplementedError + + @property + def path_type(self) -> Optional[PathType]: + return None + + def describe_match_short(self) -> str: + return self._full_pattern() + + def describe_match_exact(self) -> str: + raise NotImplementedError + + def shell_escape_pattern(self) -> str: + raise TypeError("Pattern not suitable or not supported for shell escape") + + @classmethod + def recursive_beneath_directory( + cls, + directory: str, + definition_source: str, + path_type: Optional[PathType] = None, + substitution: Substitution = NULL_SUBSTITUTION, + ) -> "MatchRule": + if directory in (".", "/"): + return MATCH_ANYTHING + assert not glob.has_magic(directory) + return DirectoryBasedMatch( + MatchRuleType.ANYTHING_BENEATH_DIR, + substitution.substitute(_normalize_path(directory), definition_source), + path_type=path_type, + ) + + @classmethod + def from_path_or_glob( + cls, + path_or_glob: str, + definition_source: str, + path_type: Optional[PathType] = None, + substitution: Substitution = NULL_SUBSTITUTION, + ) -> "MatchRule": + # TODO: Handle '{a,b,c}' patterns too + # FIXME: Better error handling! + normalized_no_prefix = _normalize_path(path_or_glob, with_prefix=False) + if path_or_glob in ("*", "**/*", ".", "/"): + assert path_type is None + return MATCH_ANYTHING + + # We do not support {a,b} at the moment. This check is not perfect, but it should catch the most obvious + # unsupported usage. + if ( + "{" in path_or_glob + and ("," in path_or_glob or ".." in path_or_glob) + and re.search(r"[{][^},.]*(?:,|[.][.])[^},.]*[}]", path_or_glob) + ): + m = re.search(r"(.*)[{]([^},.]*(?:,|[.][.])[^},.]*[}])", path_or_glob) + assert m is not None + replacement = m.group(1) + "{{OPEN_CURLY_BRACE}}" + m.group(2) + _error( + f'The pattern "{path_or_glob}" (defined in {definition_source}) looks like it contains a' + f' brace expansion (such as "{{a,b}}" or "{{a..b}}"). Brace expansions are not supported.' + " If you wanted to match the literal path a brace in it, please use a substitution to insert" + f' the opening brace. As an example: "{replacement}"' + ) + + normalized_with_prefix = "./" + normalized_no_prefix + # TODO: Check for escapes here "foo[?]/bar" can be written as an exact match for foo?/bar + # - similar holds for "foo[?]/*" being a directory match (etc.). + if not glob.has_magic(normalized_with_prefix): + assert path_type is None + return ExactFileSystemPath( + substitution.substitute(normalized_with_prefix, definition_source) + ) + + directory = os.path.dirname(normalized_with_prefix) + basename = os.path.basename(normalized_with_prefix) + + if ("**" in directory and directory != "./**") or "**" in basename: + raise ValueError( + f'Cannot process pattern "{path_or_glob}" from {definition_source}: The double-star' + ' glob ("**") is not supported in general. Only "**/<basename-glob>" supported.' + ) + + if basename == "*" and not glob.has_magic(directory): + return DirectoryBasedMatch( + MatchRuleType.DIRECT_CHILDREN_OF_DIR, + substitution.substitute(directory, definition_source), + path_type=path_type, + ) + elif directory == "./**" or not glob.has_magic(directory): + basename_glob = substitution.substitute( + basename, definition_source, escape_glob_characters=True + ) + if directory in (".", "./**"): + return BasenameGlobMatch( + basename_glob, + path_type=path_type, + recursive_match=True, + ) + return BasenameGlobMatch( + basename_glob, + only_when_in_directory=substitution.substitute( + directory, definition_source + ), + path_type=path_type, + recursive_match=False, + ) + + return GenericGlobImplementation(normalized_with_prefix, path_type=path_type) + + +def _match_file_type(path_type: PathType, path: VirtualPath) -> bool: + if path_type == PathType.FILE and path.is_file: + return True + if path_type == PathType.DIRECTORY and path.is_dir: + return True + if path_type == PathType.SYMLINK and path.is_symlink: + return True + assert path_type in (PathType.FILE, PathType.DIRECTORY, PathType.SYMLINK) + return False + + +class MatchAnything(MatchRule): + def __init__(self) -> None: + super().__init__(MatchRuleType.MATCH_ANYTHING) + + def _full_pattern(self) -> str: + return "**/*" + + def finditer(self, fs_root: VP, *, ignore_paths=None) -> Iterable[VP]: + if ignore_paths is not None: + yield from (p for p in fs_root.all_paths() if not ignore_paths(p)) + yield from fs_root.all_paths() + + def describe_match_exact(self) -> str: + return "**/* (Match anything)" + + +MATCH_ANYTHING: MatchRule = MatchAnything() + +del MatchAnything + + +class ExactFileSystemPath(MatchRule): + __slots__ = "_path" + + def __init__(self, path: str) -> None: + super().__init__(MatchRuleType.EXACT_MATCH) + self._path = path + + def _full_pattern(self) -> str: + return self._path + + def finditer(self, fs_root: VP, *, ignore_paths=None) -> Iterable[VP]: + p = _lookup_path(fs_root, self._path) + if p is not None and (ignore_paths is None or not ignore_paths(p)): + yield p + + def describe_match_exact(self) -> str: + return f"{self._path} (the exact path / no globbing)" + + @property + def path(self) -> str: + return self._path + + def shell_escape_pattern(self) -> str: + return escape_shell(self._path.lstrip(".")) + + +class DirectoryBasedMatch(MatchRule): + __slots__ = "_directory", "_path_type" + + def __init__( + self, + rule_type: MatchRuleType, + directory: str, + path_type: Optional[PathType] = None, + ) -> None: + super().__init__(rule_type) + self._directory = directory + self._path_type = path_type + assert rule_type in ( + MatchRuleType.DIRECT_CHILDREN_OF_DIR, + MatchRuleType.ANYTHING_BENEATH_DIR, + ) + assert not self._directory.endswith("/") + + def _full_pattern(self) -> str: + return self._directory + + def finditer( + self, + fs_root: VP, + *, + ignore_paths: Optional[Callable[[VP], bool]] = None, + ) -> Iterable[VP]: + p = _lookup_path(fs_root, self._directory) + if p is None or not p.is_dir: + return + if self._rule_type == MatchRuleType.ANYTHING_BENEATH_DIR: + path_iter = p.all_paths() + else: + path_iter = p.iterdir + if ignore_paths is not None: + path_iter = (p for p in path_iter if not ignore_paths(p)) + if self._path_type is None: + yield from path_iter + else: + yield from (m for m in path_iter if _match_file_type(self._path_type, m)) + + def describe_match_short(self) -> str: + path_type_match = ( + "" + if self._path_type is None + else f" <only for path type {self._path_type.manifest_key}>" + ) + if self._rule_type == MatchRuleType.ANYTHING_BENEATH_DIR: + return f"{self._directory}/**/*{path_type_match}" + return f"{self._directory}/*{path_type_match}" + + def describe_match_exact(self) -> str: + if self._rule_type == MatchRuleType.ANYTHING_BENEATH_DIR: + return f"{self._directory}/**/* (anything below the directory)" + return f"{self.describe_match_short()} (anything directly in the directory)" + + @property + def path_type(self) -> Optional[PathType]: + return self._path_type + + @property + def directory(self) -> str: + return self._directory + + def shell_escape_pattern(self) -> str: + if self._rule_type == MatchRuleType.ANYTHING_BENEATH_DIR: + return super().shell_escape_pattern() + return escape_shell(self._directory.lstrip(".")) + "/*" + + +class BasenameGlobMatch(MatchRule): + __slots__ = ( + "_basename_glob", + "_directory", + "_matcher", + "_path_type", + "_recursive_match", + "_escaped_basename_pattern", + ) + + def __init__( + self, + basename_glob: str, + only_when_in_directory: Optional[str] = None, + path_type: Optional[PathType] = None, + recursive_match: Optional[bool] = None, # TODO: Can this just be = False (?) + ) -> None: + super().__init__(MatchRuleType.BASENAME_GLOB) + self._basename_glob = basename_glob + self._directory = only_when_in_directory + self._path_type = path_type + self._recursive_match = recursive_match + if self._directory is None and not recursive_match: + self._recursive_match = True + assert self._directory is None or not self._directory.endswith("/") + assert "/" not in basename_glob # Not a basename if it contains / + assert "**" not in basename_glob # Also not a (true) basename if it has ** + self._escaped_basename_pattern, self._matcher = _compile_basename_glob( + basename_glob + ) + + def _full_pattern(self) -> str: + if self._directory is not None: + maybe_recursive = "**/" if self._recursive_match else "" + return f"{self._directory}/{maybe_recursive}{self._basename_glob}" + return self._basename_glob + + def finditer(self, fs_root: VP, *, ignore_paths=None) -> Iterable[VP]: + search_root = fs_root + if self._directory is not None: + p = _lookup_path(fs_root, self._directory) + if p is None or not p.is_dir: + return + search_root = p + path_iter = ( + search_root.all_paths() if self._recursive_match else search_root.iterdir + ) + if ignore_paths is not None: + path_iter = (p for p in path_iter if not ignore_paths(p)) + if self._path_type is None: + yield from (m for m in path_iter if self._matcher(m.name)) + else: + yield from ( + m + for m in path_iter + if self._matcher(m.name) and _match_file_type(self._path_type, m) + ) + + def describe_match_short(self) -> str: + path_type_match = ( + "" + if self._path_type is None + else f" <only for path type {self._path_type.manifest_key}>" + ) + return ( + self._full_pattern() + if path_type_match == "" + else f"{self._full_pattern()}{path_type_match}" + ) + + def describe_match_exact(self) -> str: + if self._directory is not None: + return f"{self.describe_match_short()} (glob / directly in the directory)" + return f"{self.describe_match_short()} (basename match)" + + def __eq__(self, other: object) -> bool: + if not isinstance(other, BasenameGlobMatch): + return NotImplemented + return ( + self._basename_glob == other._basename_glob + and self._directory == other._directory + and self._path_type == other._path_type + and self._recursive_match == other._recursive_match + ) + + @property + def path_type(self) -> Optional[PathType]: + return self._path_type + + @property + def directory(self) -> Optional[str]: + return self._directory + + def shell_escape_pattern(self) -> str: + if self._directory is None or self._escaped_basename_pattern is None: + return super().shell_escape_pattern() + return ( + escape_shell(self._directory.lstrip(".")) + + f"/{self._escaped_basename_pattern}" + ) + + +class GenericGlobImplementation(MatchRule): + __slots__ = "_glob_pattern", "_path_type", "_match_parts" + + def __init__( + self, + glob_pattern: str, + path_type: Optional[PathType] = None, + ) -> None: + super().__init__(MatchRuleType.GENERIC_GLOB) + if glob_pattern.startswith("./"): + glob_pattern = glob_pattern[2:] + self._glob_pattern = glob_pattern + self._path_type = path_type + assert "**" not in glob_pattern # No recursive globs + assert glob.has_magic( + glob_pattern + ) # If it has no glob, then it could have been an exact match + assert ( + "/" in glob_pattern + ) # If it does not have a / then a BasenameGlob could have been used instead + self._match_parts = self._compile_glob() + + def _full_pattern(self) -> str: + return self._glob_pattern + + def finditer(self, fs_root: VP, *, ignore_paths=None) -> Iterable[VP]: + search_history = [fs_root] + for part in self._match_parts: + next_layer = itertools.chain.from_iterable( + _apply_match(m, part) for m in search_history + ) + # TODO: Figure out why we need to materialize next_layer into a list for this to work. + search_history = list(next_layer) + if not search_history: + # While we have it as a list, we might as well have an "early exit". + return + + if self._path_type is None: + if ignore_paths is None: + yield from search_history + else: + yield from (p for p in search_history if not ignore_paths(p)) + elif ignore_paths is None: + yield from ( + m for m in search_history if _match_file_type(self._path_type, m) + ) + else: + yield from ( + m + for m in search_history + if _match_file_type(self._path_type, m) and not ignore_paths(m) + ) + + def describe_match_short(self) -> str: + path_type_match = ( + "" + if self._path_type is None + else f" <only for path type {self._path_type.manifest_key}>" + ) + return ( + self._full_pattern() + if path_type_match == "" + else f"{self._full_pattern()}{path_type_match}" + ) + + def describe_match_exact(self) -> str: + return f"{self.describe_match_short()} (glob)" + + def _compile_glob(self) -> Sequence[Union[Callable[[str], bool], str]]: + assert self._glob_pattern.strip("/") == self._glob_pattern + return [ + _compile_basename_glob(part) if glob.has_magic(part) else part + for part in self._glob_pattern.split("/") + ] + + def __eq__(self, other: object) -> bool: + if not isinstance(other, GenericGlobImplementation): + return NotImplemented + return ( + self._glob_pattern == other._glob_pattern + and self._path_type == other._path_type + ) + + @property + def path_type(self) -> Optional[PathType]: + return self._path_type diff --git a/src/debputy/plugin/__init__.py b/src/debputy/plugin/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/plugin/__init__.py diff --git a/src/debputy/plugin/api/__init__.py b/src/debputy/plugin/api/__init__.py new file mode 100644 index 0000000..0fa24be --- /dev/null +++ b/src/debputy/plugin/api/__init__.py @@ -0,0 +1,37 @@ +from ...exceptions import ( + DebputyPluginRuntimeError, + DebputyMetadataAccessError, +) +from .spec import ( + DebputyPluginInitializer, + PackageProcessingContext, + MetadataAutoDetector, + DpkgTriggerType, + Maintscript, + VirtualPath, + BinaryCtrlAccessor, + PluginInitializationEntryPoint, + undocumented_attr, + documented_attr, + reference_documentation, + virtual_path_def, + packager_provided_file_reference_documentation, +) + +__all__ = [ + "DebputyPluginInitializer", + "PackageProcessingContext", + "MetadataAutoDetector", + "DpkgTriggerType", + "Maintscript", + "BinaryCtrlAccessor", + "VirtualPath", + "PluginInitializationEntryPoint", + "documented_attr", + "undocumented_attr", + "reference_documentation", + "virtual_path_def", + "DebputyPluginRuntimeError", + "DebputyMetadataAccessError", + "packager_provided_file_reference_documentation", +] diff --git a/src/debputy/plugin/api/example_processing.py b/src/debputy/plugin/api/example_processing.py new file mode 100644 index 0000000..3bde8c3 --- /dev/null +++ b/src/debputy/plugin/api/example_processing.py @@ -0,0 +1,99 @@ +import dataclasses +from enum import Enum +from typing import Set, Tuple, List, cast, Dict, Sequence + +from debputy.filesystem_scan import build_virtual_fs +from debputy.plugin.api import VirtualPath +from debputy.plugin.api.impl_types import ( + AutomaticDiscardRuleExample, + PluginProvidedDiscardRule, +) +from debputy.util import _normalize_path + + +class DiscardVerdict(Enum): + INCONSISTENT_CODE_KEPT = ( + None, + "INCONSISTENT (code kept the path, but should have discarded)", + ) + INCONSISTENT_CODE_DISCARDED = ( + None, + "INCONSISTENT (code discarded the path, but should have kept it)", + ) + KEPT = (False, "Kept") + DISCARDED_BY_CODE = (True, "Discarded (directly by the rule)") + DISCARDED_BY_DIRECTORY = (True, "Discarded (directory was discarded)") + + @property + def message(self) -> str: + return cast("str", self.value[1]) + + @property + def is_consistent(self) -> bool: + return self.value[0] is not None + + @property + def is_discarded(self) -> bool: + return self.value[0] is True + + @property + def is_kept(self) -> bool: + return self.value[0] is False + + +@dataclasses.dataclass(slots=True, frozen=True) +class ProcessedDiscardRuleExample: + rendered_paths: Sequence[Tuple[VirtualPath, DiscardVerdict]] + inconsistent_paths: Set[VirtualPath] + # To avoid the parents being garbage collected + fs_root: VirtualPath + + +def process_discard_rule_example( + discard_rule: PluginProvidedDiscardRule, + example: AutomaticDiscardRuleExample, +) -> ProcessedDiscardRuleExample: + fs_root: VirtualPath = build_virtual_fs([p for p, _ in example.content]) + + actual_discarded: Dict[str, bool] = {} + expected_output = { + "/" + _normalize_path(p.path_name, with_prefix=False): v + for p, v in example.content + } + inconsistent_paths = set() + rendered_paths = [] + + for p in fs_root.all_paths(): + parent = p.parent_dir + discard_carry_over = False + path_name = p.absolute + if parent and actual_discarded[parent.absolute]: + verdict = True + discard_carry_over = True + else: + verdict = discard_rule.should_discard(p) + + actual_discarded[path_name] = verdict + expected = expected_output.get(path_name) + if expected is not None: + inconsistent = expected != verdict + if inconsistent: + inconsistent_paths.add(p) + else: + continue + + if inconsistent: + if verdict: + verdict_code = DiscardVerdict.INCONSISTENT_CODE_DISCARDED + else: + verdict_code = DiscardVerdict.INCONSISTENT_CODE_KEPT + elif verdict: + if discard_carry_over: + verdict_code = DiscardVerdict.DISCARDED_BY_DIRECTORY + else: + verdict_code = DiscardVerdict.DISCARDED_BY_CODE + else: + verdict_code = DiscardVerdict.KEPT + rendered_paths.append((p, verdict_code)) + + return ProcessedDiscardRuleExample(rendered_paths, inconsistent_paths, fs_root) diff --git a/src/debputy/plugin/api/feature_set.py b/src/debputy/plugin/api/feature_set.py new file mode 100644 index 0000000..6552361 --- /dev/null +++ b/src/debputy/plugin/api/feature_set.py @@ -0,0 +1,91 @@ +import dataclasses +from typing import Dict, List, Tuple, Sequence, Any + +from debputy.manifest_parser.declarative_parser import ParserGenerator +from debputy.plugin.api.impl_types import ( + DebputyPluginMetadata, + PackagerProvidedFileClassSpec, + MetadataOrMaintscriptDetector, + TTP, + DispatchingTableParser, + TP, + SUPPORTED_DISPATCHABLE_TABLE_PARSERS, + DispatchingObjectParser, + SUPPORTED_DISPATCHABLE_OBJECT_PARSERS, + PluginProvidedManifestVariable, + PluginProvidedPackageProcessor, + PluginProvidedDiscardRule, + ServiceManagerDetails, + PluginProvidedKnownPackagingFile, + PluginProvidedTypeMapping, +) + + +@dataclasses.dataclass(slots=True) +class PluginProvidedFeatureSet: + plugin_data: Dict[str, DebputyPluginMetadata] = dataclasses.field( + default_factory=dict + ) + packager_provided_files: Dict[str, PackagerProvidedFileClassSpec] = ( + dataclasses.field(default_factory=dict) + ) + metadata_maintscript_detectors: Dict[str, List[MetadataOrMaintscriptDetector]] = ( + dataclasses.field(default_factory=dict) + ) + dispatchable_table_parsers: Dict[TTP, "DispatchingTableParser[TP]"] = ( + dataclasses.field( + default_factory=lambda: { + rt: DispatchingTableParser(rt, path) + for rt, path in SUPPORTED_DISPATCHABLE_TABLE_PARSERS.items() + } + ) + ) + dispatchable_object_parsers: Dict[str, "DispatchingObjectParser"] = ( + dataclasses.field( + default_factory=lambda: { + path: DispatchingObjectParser(path, parser_documentation=ref_doc) + for path, ref_doc in SUPPORTED_DISPATCHABLE_OBJECT_PARSERS.items() + } + ) + ) + manifest_variables: Dict[str, PluginProvidedManifestVariable] = dataclasses.field( + default_factory=dict + ) + all_package_processors: Dict[Tuple[str, str], PluginProvidedPackageProcessor] = ( + dataclasses.field(default_factory=dict) + ) + auto_discard_rules: Dict[str, PluginProvidedDiscardRule] = dataclasses.field( + default_factory=dict + ) + service_managers: Dict[str, ServiceManagerDetails] = dataclasses.field( + default_factory=dict + ) + known_packaging_files: Dict[str, PluginProvidedKnownPackagingFile] = ( + dataclasses.field(default_factory=dict) + ) + mapped_types: Dict[Any, PluginProvidedTypeMapping] = dataclasses.field( + default_factory=dict + ) + manifest_parser_generator: ParserGenerator = dataclasses.field( + default_factory=ParserGenerator + ) + + def package_processors_in_order(self) -> Sequence[PluginProvidedPackageProcessor]: + order = [] + delayed = [] + for plugin_processor in self.all_package_processors.values(): + if not plugin_processor.dependencies: + order.append(plugin_processor) + else: + delayed.append(plugin_processor) + + # At the time of writing, insert order will work as a plugin cannot declare + # dependencies out of order in the current version. However, we want to + # ensure dependencies are taken a bit seriously, so we ensure that processors + # without dependencies are run first. This should weed out anything that + # needs dependencies but do not add them. + # + # It is still far from as any dependency issues will be hidden if you just + # add a single dependency. + order.extend(delayed) + return order diff --git a/src/debputy/plugin/api/impl.py b/src/debputy/plugin/api/impl.py new file mode 100644 index 0000000..e25713f --- /dev/null +++ b/src/debputy/plugin/api/impl.py @@ -0,0 +1,1926 @@ +import contextlib +import dataclasses +import functools +import importlib +import importlib.util +import itertools +import json +import os +import re +import subprocess +import sys +from abc import ABC +from json import JSONDecodeError +from typing import ( + Optional, + Callable, + Dict, + Tuple, + Iterable, + Sequence, + Type, + List, + Union, + Set, + Iterator, + IO, + Mapping, + AbstractSet, + cast, + FrozenSet, + Any, +) + +from debputy import DEBPUTY_DOC_ROOT_DIR +from debputy.exceptions import ( + DebputySubstitutionError, + PluginConflictError, + PluginMetadataError, + PluginBaseError, + PluginInitializationError, + PluginAPIViolationError, + PluginNotFoundError, +) +from debputy.maintscript_snippet import ( + STD_CONTROL_SCRIPTS, + MaintscriptSnippetContainer, + MaintscriptSnippet, +) +from debputy.manifest_parser.base_types import TypeMapping +from debputy.manifest_parser.exceptions import ManifestParseException +from debputy.manifest_parser.parser_data import ParserContextData +from debputy.manifest_parser.util import AttributePath +from debputy.plugin.api.feature_set import PluginProvidedFeatureSet +from debputy.plugin.api.impl_types import ( + DebputyPluginMetadata, + PackagerProvidedFileClassSpec, + MetadataOrMaintscriptDetector, + PluginProvidedTrigger, + TTP, + DIPHandler, + PF, + SF, + DIPKWHandler, + PluginProvidedManifestVariable, + PluginProvidedPackageProcessor, + PluginProvidedDiscardRule, + AutomaticDiscardRuleExample, + PPFFormatParam, + ServiceManagerDetails, + resolve_package_type_selectors, + KnownPackagingFileInfo, + PluginProvidedKnownPackagingFile, + InstallPatternDHCompatRule, + PluginProvidedTypeMapping, +) +from debputy.plugin.api.plugin_parser import ( + PLUGIN_METADATA_PARSER, + PluginJsonMetadata, + PLUGIN_PPF_PARSER, + PackagerProvidedFileJsonDescription, + PLUGIN_MANIFEST_VARS_PARSER, + PLUGIN_KNOWN_PACKAGING_FILES_PARSER, +) +from debputy.plugin.api.spec import ( + MaintscriptAccessor, + Maintscript, + DpkgTriggerType, + BinaryCtrlAccessor, + PackageProcessingContext, + MetadataAutoDetector, + PluginInitializationEntryPoint, + DebputyPluginInitializer, + PackageTypeSelector, + FlushableSubstvars, + ParserDocumentation, + PackageProcessor, + VirtualPath, + ServiceIntegrator, + ServiceDetector, + ServiceRegistry, + ServiceDefinition, + DSD, + ServiceUpgradeRule, + PackagerProvidedFileReferenceDocumentation, + packager_provided_file_reference_documentation, + TypeMappingDocumentation, +) +from debputy.substitution import ( + Substitution, + VariableNameState, + SUBST_VAR_RE, + VariableContext, +) +from debputy.util import ( + _normalize_path, + POSTINST_DEFAULT_CONDITION, + _error, + print_command, + _warn, +) + +PLUGIN_TEST_SUFFIX = re.compile(r"_(?:t|test|check)(?:_([a-z0-9_]+))?[.]py$") + + +def _validate_known_packaging_file_dh_compat_rules( + dh_compat_rules: Optional[List[InstallPatternDHCompatRule]], +) -> None: + max_compat = None + if not dh_compat_rules: + return + dh_compat_rule: InstallPatternDHCompatRule + for idx, dh_compat_rule in enumerate(dh_compat_rules): + dh_version = dh_compat_rule.get("starting_with_debhelper_version") + compat = dh_compat_rule.get("starting_with_compat_level") + + remaining = dh_compat_rule.keys() - { + "after_debhelper_version", + "starting_with_compat_level", + } + if not remaining: + raise ValueError( + f"The dh compat-rule at index {idx} does not affect anything not have any rules!? So why have it?" + ) + if dh_version is None and compat is None and idx < len(dh_compat_rules) - 1: + raise ValueError( + f"The dh compat-rule at index {idx} is not the last and is missing either" + " before-debhelper-version or before-compat-level" + ) + if compat is not None and compat < 0: + raise ValueError( + f"There is no compat below 1 but dh compat-rule at {idx} wants to declare some rule" + f" for something that appeared when migrating from {compat} to {compat + 1}." + ) + + if max_compat is None: + max_compat = compat + elif compat is not None: + if compat >= max_compat: + raise ValueError( + f"The dh compat-rule at {idx} should be moved earlier than the entry for compat {max_compat}." + ) + max_compat = compat + + install_pattern = dh_compat_rule.get("install_pattern") + if ( + install_pattern is not None + and _normalize_path(install_pattern, with_prefix=False) != install_pattern + ): + raise ValueError( + f"The install-pattern in dh compat-rule at {idx} must be normalized as" + f' "{_normalize_path(install_pattern, with_prefix=False)}".' + ) + + +class DebputyPluginInitializerProvider(DebputyPluginInitializer): + __slots__ = ( + "_plugin_metadata", + "_feature_set", + "_plugin_detector_ids", + "_substitution", + "_unloaders", + "_load_started", + ) + + def __init__( + self, + plugin_metadata: DebputyPluginMetadata, + feature_set: PluginProvidedFeatureSet, + substitution: Substitution, + ) -> None: + self._plugin_metadata: DebputyPluginMetadata = plugin_metadata + self._feature_set = feature_set + self._plugin_detector_ids: Set[str] = set() + self._substitution = substitution + self._unloaders: List[Callable[[], None]] = [] + self._load_started = False + + def unload_plugin(self) -> None: + if self._load_started: + for unloader in self._unloaders: + unloader() + del self._feature_set.plugin_data[self._plugin_name] + + def load_plugin(self) -> None: + metadata = self._plugin_metadata + if metadata.plugin_name in self._feature_set.plugin_data: + raise PluginConflictError( + f'The plugin "{metadata.plugin_name}" has already been loaded!?' + ) + assert ( + metadata.api_compat_version == 1 + ), f"Unsupported plugin API compat version {metadata.api_compat_version}" + self._feature_set.plugin_data[metadata.plugin_name] = metadata + self._load_started = True + assert not metadata.is_initialized + try: + metadata.initialize_plugin(self) + except Exception as e: + initializer = metadata.plugin_initializer + if ( + isinstance(e, TypeError) + and initializer is not None + and not callable(initializer) + ): + raise PluginMetadataError( + f"The specified entry point for plugin {metadata.plugin_name} does not appear to be a" + f" callable (callable returns False). The specified entry point identifies" + f' itself as "{initializer.__qualname__}".' + ) from e + elif isinstance(e, PluginBaseError): + raise + raise PluginInitializationError( + f"Exception while attempting to load plugin {metadata.plugin_name}" + ) from e + + def packager_provided_file( + self, + stem: str, + installed_path: str, + *, + default_mode: int = 0o0644, + default_priority: Optional[int] = None, + allow_name_segment: bool = True, + allow_architecture_segment: bool = False, + post_formatting_rewrite: Optional[Callable[[str], str]] = None, + packageless_is_fallback_for_all_packages: bool = False, + reservation_only: bool = False, + format_callback: Optional[ + Callable[[str, PPFFormatParam, VirtualPath], str] + ] = None, + reference_documentation: Optional[ + PackagerProvidedFileReferenceDocumentation + ] = None, + ) -> None: + packager_provided_files = self._feature_set.packager_provided_files + existing = packager_provided_files.get(stem) + + if format_callback is not None and self._plugin_name != "debputy": + raise ValueError( + "Sorry; Using format_callback is a debputy-internal" + f" API. Triggered by plugin {self._plugin_name}" + ) + + if installed_path.endswith("/"): + raise ValueError( + f'The installed_path ends with "/" indicating it is a directory, but it must be a file.' + f" Triggered by plugin {self._plugin_name}." + ) + + installed_path = _normalize_path(installed_path) + + has_name_var = "{name}" in installed_path + + if installed_path.startswith("./DEBIAN") or reservation_only: + # Special-case, used for control files. + if self._plugin_name != "debputy": + raise ValueError( + "Sorry; Using DEBIAN as install path or/and reservation_only is a debputy-internal" + f" API. Triggered by plugin {self._plugin_name}" + ) + elif not has_name_var and "{owning_package}" not in installed_path: + raise ValueError( + 'The installed_path must contain a "{name}" (preferred) or a "{owning_package}"' + " substitution (or have installed_path end with a slash). Otherwise, the installed" + f" path would caused file-conflicts. Triggered by plugin {self._plugin_name}" + ) + + if allow_name_segment and not has_name_var: + raise ValueError( + 'When allow_name_segment is True, the installed_path must have a "{name}" substitution' + " variable. Otherwise, the name segment will not work properly. Triggered by" + f" plugin {self._plugin_name}" + ) + + if ( + default_priority is not None + and "{priority}" not in installed_path + and "{priority:02}" not in installed_path + ): + raise ValueError( + 'When default_priority is not None, the installed_path should have a "{priority}"' + ' or a "{priority:02}" substitution variable. Otherwise, the priority would be lost.' + f" Triggered by plugin {self._plugin_name}" + ) + + if existing is not None: + if existing.debputy_plugin_metadata.plugin_name != self._plugin_name: + message = ( + f'The stem "{stem}" is registered twice for packager provided files.' + f" Once by {existing.debputy_plugin_metadata.plugin_name} and once" + f" by {self._plugin_name}" + ) + else: + message = ( + f"Bug in the plugin {self._plugin_name}: It tried to register the" + f' stem "{stem}" twice for packager provided files.' + ) + raise PluginConflictError( + message, existing.debputy_plugin_metadata, self._plugin_metadata + ) + packager_provided_files[stem] = PackagerProvidedFileClassSpec( + self._plugin_metadata, + stem, + installed_path, + default_mode=default_mode, + default_priority=default_priority, + allow_name_segment=allow_name_segment, + allow_architecture_segment=allow_architecture_segment, + post_formatting_rewrite=post_formatting_rewrite, + packageless_is_fallback_for_all_packages=packageless_is_fallback_for_all_packages, + reservation_only=reservation_only, + formatting_callback=format_callback, + reference_documentation=reference_documentation, + ) + + def _unload() -> None: + del packager_provided_files[stem] + + self._unloaders.append(_unload) + + def metadata_or_maintscript_detector( + self, + auto_detector_id: str, + auto_detector: MetadataAutoDetector, + *, + package_type: PackageTypeSelector = "deb", + ) -> None: + if auto_detector_id in self._plugin_detector_ids: + raise ValueError( + f"The plugin {self._plugin_name} tried to register" + f' "{auto_detector_id}" twice' + ) + self._plugin_detector_ids.add(auto_detector_id) + all_detectors = self._feature_set.metadata_maintscript_detectors + if self._plugin_name not in all_detectors: + all_detectors[self._plugin_name] = [] + package_types = resolve_package_type_selectors(package_type) + all_detectors[self._plugin_name].append( + MetadataOrMaintscriptDetector( + detector_id=auto_detector_id, + detector=auto_detector, + plugin_metadata=self._plugin_metadata, + applies_to_package_types=package_types, + enabled=True, + ) + ) + + def _unload() -> None: + if self._plugin_name in all_detectors: + del all_detectors[self._plugin_name] + + self._unloaders.append(_unload) + + def document_builtin_variable( + self, + variable_name: str, + variable_reference_documentation: str, + *, + is_context_specific: bool = False, + is_for_special_case: bool = False, + ) -> None: + manifest_variables = self._feature_set.manifest_variables + self._restricted_api() + state = self._substitution.variable_state(variable_name) + if state == VariableNameState.UNDEFINED: + raise ValueError( + f"The plugin {self._plugin_name} attempted to document built-in {variable_name}," + f" but it is not known to be a variable" + ) + + assert variable_name not in manifest_variables + + manifest_variables[variable_name] = PluginProvidedManifestVariable( + self._plugin_metadata, + variable_name, + None, + is_context_specific_variable=is_context_specific, + variable_reference_documentation=variable_reference_documentation, + is_documentation_placeholder=True, + is_for_special_case=is_for_special_case, + ) + + def _unload() -> None: + del manifest_variables[variable_name] + + self._unloaders.append(_unload) + + def manifest_variable_provider( + self, + provider: Callable[[VariableContext], Mapping[str, str]], + variables: Union[Sequence[str], Mapping[str, Optional[str]]], + ) -> None: + self._restricted_api() + cached_provider = functools.lru_cache(None)(provider) + permitted_variables = frozenset(variables) + variables_iter: Iterable[Tuple[str, Optional[str]]] + if not isinstance(variables, Mapping): + variables_iter = zip(variables, itertools.repeat(None)) + else: + variables_iter = variables.items() + + checked_vars = False + manifest_variables = self._feature_set.manifest_variables + plugin_name = self._plugin_name + + def _value_resolver_generator( + variable_name: str, + ) -> Callable[[VariableContext], str]: + def _value_resolver(variable_context: VariableContext) -> str: + res = cached_provider(variable_context) + nonlocal checked_vars + if not checked_vars: + if permitted_variables != res.keys(): + expected = ", ".join(sorted(permitted_variables)) + actual = ", ".join(sorted(res)) + raise PluginAPIViolationError( + f"The plugin {plugin_name} claimed to provide" + f" the following variables {expected}," + f" but when resolving the variables, the plugin provided" + f" {actual}. These two lists should have been the same." + ) + checked_vars = False + return res[variable_name] + + return _value_resolver + + for varname, vardoc in variables_iter: + self._check_variable_name(varname) + manifest_variables[varname] = PluginProvidedManifestVariable( + self._plugin_metadata, + varname, + _value_resolver_generator(varname), + is_context_specific_variable=False, + variable_reference_documentation=vardoc, + ) + + def _unload() -> None: + raise PluginInitializationError( + "Cannot unload manifest_variable_provider (not implemented)" + ) + + self._unloaders.append(_unload) + + def _check_variable_name(self, variable_name: str) -> None: + manifest_variables = self._feature_set.manifest_variables + existing = manifest_variables.get(variable_name) + + if existing is not None: + if existing.plugin_metadata.plugin_name == self._plugin_name: + message = ( + f"Bug in the plugin {self._plugin_name}: It tried to register the" + f' manifest variable "{variable_name}" twice.' + ) + else: + message = ( + f"The plugins {existing.plugin_metadata.plugin_name} and {self._plugin_name}" + f" both tried to provide the manifest variable {variable_name}" + ) + raise PluginConflictError( + message, existing.plugin_metadata, self._plugin_metadata + ) + if not SUBST_VAR_RE.match("{{" + variable_name + "}}"): + raise ValueError( + f"The plugin {self._plugin_name} attempted to declare {variable_name}," + f" which is not a valid variable name" + ) + + namespace = "" + variable_basename = variable_name + if ":" in variable_name: + namespace, variable_basename = variable_name.rsplit(":", 1) + assert namespace != "" + assert variable_name != "" + + if namespace != "" and namespace not in ("token", "path"): + raise ValueError( + f"The plugin {self._plugin_name} attempted to declare {variable_name}," + f" which is in the reserved namespace {namespace}" + ) + + variable_name_upper = variable_name.upper() + if ( + variable_name_upper.startswith(("DEB_", "DPKG_", "DEBPUTY")) + or variable_basename.startswith("_") + or variable_basename.upper().startswith("DEBPUTY") + ) and self._plugin_name != "debputy": + raise ValueError( + f"The plugin {self._plugin_name} attempted to declare {variable_name}," + f" which is a variable name reserved by debputy" + ) + + state = self._substitution.variable_state(variable_name) + if state != VariableNameState.UNDEFINED and self._plugin_name != "debputy": + raise ValueError( + f"The plugin {self._plugin_name} attempted to declare {variable_name}," + f" which would shadow a built-in variable" + ) + + def package_processor( + self, + processor_id: str, + processor: PackageProcessor, + *, + depends_on_processor: Iterable[str] = tuple(), + package_type: PackageTypeSelector = "deb", + ) -> None: + self._restricted_api(allowed_plugins={"lua"}) + package_processors = self._feature_set.all_package_processors + dependencies = set() + processor_key = (self._plugin_name, processor_id) + + if processor_key in package_processors: + raise PluginConflictError( + f"The plugin {self._plugin_name} already registered a processor with id {processor_id}", + self._plugin_metadata, + self._plugin_metadata, + ) + + for depends_ref in depends_on_processor: + if isinstance(depends_ref, str): + if (self._plugin_name, depends_ref) in package_processors: + depends_key = (self._plugin_name, depends_ref) + elif ("debputy", depends_ref) in package_processors: + depends_key = ("debputy", depends_ref) + else: + raise ValueError( + f'Could not resolve dependency "{depends_ref}" for' + f' "{processor_id}". It was not provided by the plugin itself' + f" ({self._plugin_name}) nor debputy." + ) + else: + # TODO: Add proper dependencies first, at which point we should probably resolve "name" + # via the direct dependencies. + assert False + + existing_processor = package_processors.get(depends_key) + if existing_processor is None: + # We currently require the processor to be declared already. If this ever changes, + # PluginProvidedFeatureSet.package_processors_in_order will need an update + dplugin_name, dprocessor_name = depends_key + available_processors = ", ".join( + n for p, n in package_processors.keys() if p == dplugin_name + ) + raise ValueError( + f"The plugin {dplugin_name} does not provide a processor called" + f" {dprocessor_name}. Available processors for that plugin are:" + f" {available_processors}" + ) + dependencies.add(depends_key) + + package_processors[processor_key] = PluginProvidedPackageProcessor( + processor_id, + resolve_package_type_selectors(package_type), + processor, + frozenset(dependencies), + self._plugin_metadata, + ) + + def _unload() -> None: + del package_processors[processor_key] + + self._unloaders.append(_unload) + + def automatic_discard_rule( + self, + name: str, + should_discard: Callable[[VirtualPath], bool], + *, + rule_reference_documentation: Optional[str] = None, + examples: Union[ + AutomaticDiscardRuleExample, Sequence[AutomaticDiscardRuleExample] + ] = tuple(), + ) -> None: + """Register an automatic discard rule + + An automatic discard rule is basically applied to *every* path about to be installed in to any package. + If any discard rule concludes that a path should not be installed, then the path is not installed. + In the case where the discard path is a: + + * directory: Then the entire directory is excluded along with anything beneath it. + * symlink: Then the symlink itself (but not its target) is excluded. + * hardlink: Then the current hardlink will not be installed, but other instances of it will be. + + Note: Discarded files are *never* deleted by `debputy`. They just make `debputy` skip the file. + + Automatic discard rules should be written with the assumption that directories will be tested + before their content *when it is relevant* for the discard rule to examine whether the directory + can be excluded. + + The packager can via the manifest overrule automatic discard rules by explicitly listing the path + without any globs. As example: + + installations: + - install: + sources: + - usr/lib/libfoo.la # <-- This path is always installed + # (Discard rules are never asked in this case) + # + - usr/lib/*.so* # <-- Discard rules applies to any path beneath usr/lib and can exclude matches + # Though, they will not examine `libfoo.la` as it has already been installed + # + # Note: usr/lib itself is never tested in this case (it is assumed to be + # explicitly requested). But any subdir of usr/lib will be examined. + + When an automatic discard rule is evaluated, it can see the source path currently being considered + for installation. While it can look at "surrounding" context (like parent directory), it will not + know whether those paths are to be installed or will be installed. + + :param name: A user visible name discard rule. It can be used on the command line, so avoid shell + metacharacters and spaces. + :param should_discard: A callable that is the implementation of the automatic discard rule. It will receive + a VirtualPath representing the *source* path about to be installed. If callable returns `True`, then the + path is discarded. If it returns `False`, the path is not discarded (by this rule at least). + A source path will either be from the root of the source tree or the root of a search directory such as + `debian/tmp`. Where the path will be installed is not available at the time the discard rule is + evaluated. + :param rule_reference_documentation: Optionally, the reference documentation to be shown when a user + looks up this automatic discard rule. + :param examples: Provide examples for the rule. Use the automatic_discard_rule_example function to + generate the examples. + + """ + self._restricted_api() + auto_discard_rules = self._feature_set.auto_discard_rules + existing = auto_discard_rules.get(name) + if existing is not None: + if existing.plugin_metadata.plugin_name == self._plugin_name: + message = ( + f"Bug in the plugin {self._plugin_name}: It tried to register the" + f' automatic discard rule "{name}" twice.' + ) + else: + message = ( + f"The plugins {existing.plugin_metadata.plugin_name} and {self._plugin_name}" + f" both tried to provide the automatic discard rule {name}" + ) + raise PluginConflictError( + message, existing.plugin_metadata, self._plugin_metadata + ) + examples = ( + (examples,) + if isinstance(examples, AutomaticDiscardRuleExample) + else tuple(examples) + ) + auto_discard_rules[name] = PluginProvidedDiscardRule( + name, + self._plugin_metadata, + should_discard, + rule_reference_documentation, + examples, + ) + + def _unload() -> None: + del auto_discard_rules[name] + + self._unloaders.append(_unload) + + def service_provider( + self, + service_manager: str, + detector: ServiceDetector, + integrator: ServiceIntegrator, + ) -> None: + self._restricted_api() + service_managers = self._feature_set.service_managers + existing = service_managers.get(service_manager) + if existing is not None: + if existing.plugin_metadata.plugin_name == self._plugin_name: + message = ( + f"Bug in the plugin {self._plugin_name}: It tried to register the" + f' service manager "{service_manager}" twice.' + ) + else: + message = ( + f"The plugins {existing.plugin_metadata.plugin_name} and {self._plugin_name}" + f' both tried to provide the service manager "{service_manager}"' + ) + raise PluginConflictError( + message, existing.plugin_metadata, self._plugin_metadata + ) + service_managers[service_manager] = ServiceManagerDetails( + service_manager, + detector, + integrator, + self._plugin_metadata, + ) + + def _unload() -> None: + del service_managers[service_manager] + + self._unloaders.append(_unload) + + def manifest_variable( + self, + variable_name: str, + value: str, + variable_reference_documentation: Optional[str] = None, + ) -> None: + self._check_variable_name(variable_name) + manifest_variables = self._feature_set.manifest_variables + try: + resolved_value = self._substitution.substitute( + value, "Plugin initialization" + ) + depends_on_variable = resolved_value != value + except DebputySubstitutionError: + depends_on_variable = True + if depends_on_variable: + raise ValueError( + f"The plugin {self._plugin_name} attempted to declare {variable_name} with value {value!r}." + f" This value depends on another variable, which is not supported. This restriction may be" + f" lifted in the future." + ) + + manifest_variables[variable_name] = PluginProvidedManifestVariable( + self._plugin_metadata, + variable_name, + value, + is_context_specific_variable=False, + variable_reference_documentation=variable_reference_documentation, + ) + + def _unload() -> None: + # We need to check it was never resolved + raise PluginInitializationError( + "Cannot unload manifest_variable (not implemented)" + ) + + self._unloaders.append(_unload) + + @property + def _plugin_name(self) -> str: + return self._plugin_metadata.plugin_name + + def provide_manifest_keyword( + self, + rule_type: TTP, + rule_name: Union[str, List[str]], + handler: DIPKWHandler, + *, + inline_reference_documentation: Optional[ParserDocumentation] = None, + ) -> None: + self._restricted_api() + if rule_type not in self._feature_set.dispatchable_table_parsers: + types = ", ".join( + sorted(x.__name__ for x in self._feature_set.dispatchable_table_parsers) + ) + raise ValueError( + f"The rule_type was not a supported type. It must be one of {types}" + ) + dispatching_parser = self._feature_set.dispatchable_table_parsers[rule_type] + dispatching_parser.register_keyword( + rule_name, + handler, + self._plugin_metadata, + inline_reference_documentation=inline_reference_documentation, + ) + + def _unload() -> None: + raise PluginInitializationError( + "Cannot unload provide_manifest_keyword (not implemented)" + ) + + self._unloaders.append(_unload) + + def plugable_object_parser( + self, + rule_type: str, + rule_name: str, + *, + object_parser_key: Optional[str] = None, + on_end_parse_step: Optional[ + Callable[ + [str, Optional[Mapping[str, Any]], AttributePath, ParserContextData], + None, + ] + ] = None, + ) -> None: + self._restricted_api() + if object_parser_key is None: + object_parser_key = rule_name + dispatchable_object_parsers = self._feature_set.dispatchable_object_parsers + if rule_type not in dispatchable_object_parsers: + types = ", ".join(sorted(dispatchable_object_parsers)) + raise ValueError( + f"The rule_type was not a supported type. It must be one of {types}" + ) + if object_parser_key not in dispatchable_object_parsers: + types = ", ".join(sorted(dispatchable_object_parsers)) + raise ValueError( + f"The object_parser_key was not a supported type. It must be one of {types}" + ) + parent_dispatcher = dispatchable_object_parsers[rule_type] + child_dispatcher = dispatchable_object_parsers[object_parser_key] + parent_dispatcher.register_child_parser( + rule_name, + child_dispatcher, + self._plugin_metadata, + on_end_parse_step=on_end_parse_step, + ) + + def _unload() -> None: + raise PluginInitializationError( + "Cannot unload plugable_object_parser (not implemented)" + ) + + self._unloaders.append(_unload) + + def plugable_manifest_rule( + self, + rule_type: Union[TTP, str], + rule_name: Union[str, List[str]], + parsed_format: Type[PF], + handler: DIPHandler, + *, + source_format: Optional[SF] = None, + inline_reference_documentation: Optional[ParserDocumentation] = None, + ) -> None: + self._restricted_api() + feature_set = self._feature_set + if isinstance(rule_type, str): + if rule_type not in feature_set.dispatchable_object_parsers: + types = ", ".join(sorted(feature_set.dispatchable_object_parsers)) + raise ValueError( + f"The rule_type was not a supported type. It must be one of {types}" + ) + dispatching_parser = feature_set.dispatchable_object_parsers[rule_type] + else: + if rule_type not in feature_set.dispatchable_table_parsers: + types = ", ".join( + sorted(x.__name__ for x in feature_set.dispatchable_table_parsers) + ) + raise ValueError( + f"The rule_type was not a supported type. It must be one of {types}" + ) + dispatching_parser = feature_set.dispatchable_table_parsers[rule_type] + + parser = feature_set.manifest_parser_generator.parser_from_typed_dict( + parsed_format, + source_content=source_format, + inline_reference_documentation=inline_reference_documentation, + ) + dispatching_parser.register_parser( + rule_name, + parser, + handler, + self._plugin_metadata, + ) + + def _unload() -> None: + raise PluginInitializationError( + "Cannot unload plugable_manifest_rule (not implemented)" + ) + + self._unloaders.append(_unload) + + def known_packaging_files( + self, + packaging_file_details: KnownPackagingFileInfo, + ) -> None: + known_packaging_files = self._feature_set.known_packaging_files + detection_method = packaging_file_details.get( + "detection_method", cast("Literal['path']", "path") + ) + path = packaging_file_details.get("path") + dhpkgfile = packaging_file_details.get("pkgfile") + + packaging_file_details: KnownPackagingFileInfo = packaging_file_details.copy() + + if detection_method == "path": + if dhpkgfile is not None: + raise ValueError( + 'The "pkgfile" attribute cannot be used when detection-method is "path" (or omitted)' + ) + if path != _normalize_path(path, with_prefix=False): + raise ValueError( + f"The path for known packaging files must be normalized. Please replace" + f' "{path}" with "{_normalize_path(path, with_prefix=False)}"' + ) + detection_value = path + else: + assert detection_method == "dh.pkgfile" + if path is not None: + raise ValueError( + 'The "path" attribute cannot be used when detection-method is "dh.pkgfile"' + ) + if "/" in dhpkgfile: + raise ValueError( + 'The "pkgfile" attribute ḿust be a name stem such as "install" (no "/" are allowed)' + ) + detection_value = dhpkgfile + key = f"{detection_method}::{detection_value}" + existing = known_packaging_files.get(key) + if existing is not None: + if existing.plugin_metadata.plugin_name != self._plugin_name: + message = ( + f'The key "{key}" is registered twice for known packaging files.' + f" Once by {existing.plugin_metadata.plugin_name} and once by {self._plugin_name}" + ) + else: + message = ( + f"Bug in the plugin {self._plugin_name}: It tried to register the" + f' key "{key}" twice for known packaging files.' + ) + raise PluginConflictError( + message, existing.plugin_metadata, self._plugin_metadata + ) + _validate_known_packaging_file_dh_compat_rules( + packaging_file_details.get("dh_compat_rules") + ) + known_packaging_files[key] = PluginProvidedKnownPackagingFile( + packaging_file_details, + detection_method, + detection_value, + self._plugin_metadata, + ) + + def _unload() -> None: + del known_packaging_files[key] + + self._unloaders.append(_unload) + + def register_mapped_type( + self, + type_mapping: TypeMapping, + *, + reference_documentation: Optional[TypeMappingDocumentation] = None, + ) -> None: + self._restricted_api() + target_type = type_mapping.target_type + mapped_types = self._feature_set.mapped_types + existing = mapped_types.get(target_type) + if existing is not None: + if existing.plugin_metadata.plugin_name != self._plugin_name: + message = ( + f'The key "{target_type.__name__}" is registered twice for known packaging files.' + f" Once by {existing.plugin_metadata.plugin_name} and once by {self._plugin_name}" + ) + else: + message = ( + f"Bug in the plugin {self._plugin_name}: It tried to register the" + f' key "{target_type.__name__}" twice for known packaging files.' + ) + raise PluginConflictError( + message, existing.plugin_metadata, self._plugin_metadata + ) + parser_generator = self._feature_set.manifest_parser_generator + mapped_types[target_type] = PluginProvidedTypeMapping( + type_mapping, reference_documentation, self._plugin_metadata + ) + parser_generator.register_mapped_type(type_mapping) + + def _restricted_api( + self, + *, + allowed_plugins: Union[Set[str], FrozenSet[str]] = frozenset(), + ) -> None: + if self._plugin_name != "debputy" and self._plugin_name not in allowed_plugins: + raise PluginAPIViolationError( + f"Plugin {self._plugin_name} attempted to access a debputy-only API." + " If you are the maintainer of this plugin and want access to this" + " API, please file a feature request to make this public." + " (The API is currently private as it is unstable.)" + ) + + +class MaintscriptAccessorProviderBase(MaintscriptAccessor, ABC): + __slots__ = () + + def _append_script( + self, + caller_name: str, + maintscript: Maintscript, + full_script: str, + /, + perform_substitution: bool = True, + ) -> None: + raise NotImplementedError + + @classmethod + def _apply_condition_to_script( + cls, + condition: str, + run_snippet: str, + /, + indent: Optional[bool] = None, + ) -> str: + if indent is None: + # We auto-determine this based on heredocs currently + indent = "<<" not in run_snippet + + if indent: + run_snippet = "".join(" " + x for x in run_snippet.splitlines(True)) + if not run_snippet.endswith("\n"): + run_snippet += "\n" + condition_line = f"if {condition}; then\n" + end_line = "fi\n" + return "".join((condition_line, run_snippet, end_line)) + + def on_configure( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + skip_on_rollback: bool = False, + ) -> None: + condition = POSTINST_DEFAULT_CONDITION + if skip_on_rollback: + condition = '[ "$1" = "configure" ]' + return self._append_script( + "on_configure", + "postinst", + self._apply_condition_to_script(condition, run_snippet, indent=indent), + perform_substitution=perform_substitution, + ) + + def on_initial_install( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + condition = '[ "$1" = "configure" -a -z "$2" ]' + return self._append_script( + "on_initial_install", + "postinst", + self._apply_condition_to_script(condition, run_snippet, indent=indent), + perform_substitution=perform_substitution, + ) + + def on_upgrade( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + condition = '[ "$1" = "configure" -a -n "$2" ]' + return self._append_script( + "on_upgrade", + "postinst", + self._apply_condition_to_script(condition, run_snippet, indent=indent), + perform_substitution=perform_substitution, + ) + + def on_upgrade_from( + self, + version: str, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + condition = '[ "$1" = "configure" ] && dpkg --compare-versions le-nl "$2"' + return self._append_script( + "on_upgrade_from", + "postinst", + self._apply_condition_to_script(condition, run_snippet, indent=indent), + perform_substitution=perform_substitution, + ) + + def on_before_removal( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + condition = '[ "$1" = "remove" ]' + return self._append_script( + "on_before_removal", + "prerm", + self._apply_condition_to_script(condition, run_snippet, indent=indent), + perform_substitution=perform_substitution, + ) + + def on_removed( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + condition = '[ "$1" = "remove" ]' + return self._append_script( + "on_removed", + "postrm", + self._apply_condition_to_script(condition, run_snippet, indent=indent), + perform_substitution=perform_substitution, + ) + + def on_purge( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + condition = '[ "$1" = "purge" ]' + return self._append_script( + "on_purge", + "postrm", + self._apply_condition_to_script(condition, run_snippet, indent=indent), + perform_substitution=perform_substitution, + ) + + def unconditionally_in_script( + self, + maintscript: Maintscript, + run_snippet: str, + /, + perform_substitution: bool = True, + ) -> None: + if maintscript not in STD_CONTROL_SCRIPTS: + raise ValueError( + f'Unknown script "{maintscript}". Should have been one of:' + f' {", ".join(sorted(STD_CONTROL_SCRIPTS))}' + ) + return self._append_script( + "unconditionally_in_script", + maintscript, + run_snippet, + perform_substitution=perform_substitution, + ) + + +class MaintscriptAccessorProvider(MaintscriptAccessorProviderBase): + __slots__ = ( + "_plugin_metadata", + "_maintscript_snippets", + "_plugin_source_id", + "_package_substitution", + ) + + def __init__( + self, + plugin_metadata: DebputyPluginMetadata, + plugin_source_id: str, + maintscript_snippets: Dict[str, MaintscriptSnippetContainer], + package_substitution: Substitution, + ): + self._plugin_metadata = plugin_metadata + self._plugin_source_id = plugin_source_id + self._maintscript_snippets = maintscript_snippets + self._package_substitution = package_substitution + + def _append_script( + self, + caller_name: str, + maintscript: Maintscript, + full_script: str, + /, + perform_substitution: bool = True, + ) -> None: + def_source = f"{self._plugin_metadata.plugin_name} ({self._plugin_source_id})" + if perform_substitution: + full_script = self._package_substitution.substitute(full_script, def_source) + + snippet = MaintscriptSnippet(snippet=full_script, definition_source=def_source) + self._maintscript_snippets[maintscript].append(snippet) + + +class BinaryCtrlAccessorProviderBase(BinaryCtrlAccessor): + __slots__ = ( + "_plugin_metadata", + "_plugin_source_id", + "_package_metadata_context", + "_triggers", + "_substvars", + "_maintscript", + "_shlibs_details", + ) + + def __init__( + self, + plugin_metadata: DebputyPluginMetadata, + plugin_source_id: str, + package_metadata_context: PackageProcessingContext, + triggers: Dict[Tuple[DpkgTriggerType, str], PluginProvidedTrigger], + substvars: FlushableSubstvars, + shlibs_details: Tuple[Optional[str], Optional[List[str]]], + ) -> None: + self._plugin_metadata = plugin_metadata + self._plugin_source_id = plugin_source_id + self._package_metadata_context = package_metadata_context + self._triggers = triggers + self._substvars = substvars + self._maintscript: Optional[MaintscriptAccessor] = None + self._shlibs_details = shlibs_details + + def _create_maintscript_accessor(self) -> MaintscriptAccessor: + raise NotImplementedError + + def dpkg_trigger(self, trigger_type: DpkgTriggerType, trigger_target: str) -> None: + """Register a declarative dpkg level trigger + + The provided trigger will be added to the package's metadata (the triggers file of the control.tar). + + If the trigger has already been added previously, a second call with the same trigger data will be ignored. + """ + key = (trigger_type, trigger_target) + if key in self._triggers: + return + self._triggers[key] = PluginProvidedTrigger( + dpkg_trigger_type=trigger_type, + dpkg_trigger_target=trigger_target, + provider=self._plugin_metadata, + provider_source_id=self._plugin_source_id, + ) + + @property + def maintscript(self) -> MaintscriptAccessor: + maintscript = self._maintscript + if maintscript is None: + maintscript = self._create_maintscript_accessor() + self._maintscript = maintscript + return maintscript + + @property + def substvars(self) -> FlushableSubstvars: + return self._substvars + + def dpkg_shlibdeps(self, paths: Sequence[VirtualPath]) -> None: + binary_package = self._package_metadata_context.binary_package + with self.substvars.flush() as substvars_file: + dpkg_cmd = ["dpkg-shlibdeps", f"-T{substvars_file}"] + if binary_package.is_udeb: + dpkg_cmd.append("-tudeb") + if binary_package.is_essential: + dpkg_cmd.append("-dPre-Depends") + shlibs_local, shlib_dirs = self._shlibs_details + if shlibs_local is not None: + dpkg_cmd.append(f"-L{shlibs_local}") + if shlib_dirs: + dpkg_cmd.extend(f"-l{sd}" for sd in shlib_dirs) + dpkg_cmd.extend(p.fs_path for p in paths) + print_command(*dpkg_cmd) + try: + subprocess.check_call(dpkg_cmd) + except subprocess.CalledProcessError: + _error( + f"Attempting to auto-detect dependencies via dpkg-shlibdeps for {binary_package.name} failed. Please" + " review the output from dpkg-shlibdeps above to understand what went wrong." + ) + + +class BinaryCtrlAccessorProvider(BinaryCtrlAccessorProviderBase): + __slots__ = ( + "_maintscript", + "_maintscript_snippets", + "_package_substitution", + ) + + def __init__( + self, + plugin_metadata: DebputyPluginMetadata, + plugin_source_id: str, + package_metadata_context: PackageProcessingContext, + triggers: Dict[Tuple[DpkgTriggerType, str], PluginProvidedTrigger], + substvars: FlushableSubstvars, + maintscript_snippets: Dict[str, MaintscriptSnippetContainer], + package_substitution: Substitution, + shlibs_details: Tuple[Optional[str], Optional[List[str]]], + ) -> None: + super().__init__( + plugin_metadata, + plugin_source_id, + package_metadata_context, + triggers, + substvars, + shlibs_details, + ) + self._maintscript_snippets = maintscript_snippets + self._package_substitution = package_substitution + self._maintscript = MaintscriptAccessorProvider( + plugin_metadata, + plugin_source_id, + maintscript_snippets, + package_substitution, + ) + + def _create_maintscript_accessor(self) -> MaintscriptAccessor: + return MaintscriptAccessorProvider( + self._plugin_metadata, + self._plugin_source_id, + self._maintscript_snippets, + self._package_substitution, + ) + + +class BinaryCtrlAccessorProviderCreator: + def __init__( + self, + package_metadata_context: PackageProcessingContext, + substvars: FlushableSubstvars, + maintscript_snippets: Dict[str, MaintscriptSnippetContainer], + substitution: Substitution, + ) -> None: + self._package_metadata_context = package_metadata_context + self._substvars = substvars + self._maintscript_snippets = maintscript_snippets + self._substitution = substitution + self._triggers: Dict[Tuple[DpkgTriggerType, str], PluginProvidedTrigger] = {} + self.shlibs_details: Tuple[Optional[str], Optional[List[str]]] = None, None + + def for_plugin( + self, + plugin_metadata: DebputyPluginMetadata, + plugin_source_id: str, + ) -> BinaryCtrlAccessor: + return BinaryCtrlAccessorProvider( + plugin_metadata, + plugin_source_id, + self._package_metadata_context, + self._triggers, + self._substvars, + self._maintscript_snippets, + self._substitution, + self.shlibs_details, + ) + + def generated_triggers(self) -> Iterable[PluginProvidedTrigger]: + return self._triggers.values() + + +def plugin_metadata_for_debputys_own_plugin( + loader: Optional[PluginInitializationEntryPoint] = None, +) -> DebputyPluginMetadata: + if loader is None: + from debputy.plugin.debputy.debputy_plugin import initialize_debputy_features + + loader = initialize_debputy_features + return DebputyPluginMetadata( + plugin_name="debputy", + api_compat_version=1, + plugin_initializer=loader, + plugin_loader=None, + plugin_path="<bundled>", + ) + + +def load_plugin_features( + plugin_search_dirs: Sequence[str], + substitution: Substitution, + requested_plugins_only: Optional[Sequence[str]] = None, + required_plugins: Optional[Set[str]] = None, + plugin_feature_set: Optional[PluginProvidedFeatureSet] = None, + debug_mode: bool = False, +) -> PluginProvidedFeatureSet: + if plugin_feature_set is None: + plugin_feature_set = PluginProvidedFeatureSet() + plugins = [plugin_metadata_for_debputys_own_plugin()] + unloadable_plugins = set() + if required_plugins: + plugins.extend( + find_json_plugins( + plugin_search_dirs, + required_plugins, + ) + ) + if requested_plugins_only is not None: + plugins.extend( + find_json_plugins( + plugin_search_dirs, + requested_plugins_only, + ) + ) + else: + auto_loaded = _find_all_json_plugins( + plugin_search_dirs, + required_plugins if required_plugins is not None else frozenset(), + debug_mode=debug_mode, + ) + for plugin_metadata in auto_loaded: + plugins.append(plugin_metadata) + unloadable_plugins.add(plugin_metadata.plugin_name) + + for plugin_metadata in plugins: + api = DebputyPluginInitializerProvider( + plugin_metadata, plugin_feature_set, substitution + ) + try: + api.load_plugin() + except PluginBaseError as e: + if plugin_metadata.plugin_name not in unloadable_plugins: + raise + if debug_mode: + raise + try: + api.unload_plugin() + except Exception: + _warn( + f"Failed to load optional {plugin_metadata.plugin_name} and an error was raised when trying to" + " clean up after the half-initialized plugin. Re-raising load error as the partially loaded" + " module might have tainted the feature set." + ) + raise e from None + else: + if debug_mode: + _warn( + f"The optional plugin {plugin_metadata.plugin_name} failed during load. Re-raising due" + f" to --debug/-d." + ) + _warn( + f"The optional plugin {plugin_metadata.plugin_name} failed during load. The plugin was" + f" deactivated. Use debug mode (--debug) to show the stacktrace (the warning will become an error)" + ) + + return plugin_feature_set + + +def find_json_plugin( + search_dirs: Sequence[str], + requested_plugin: str, +) -> DebputyPluginMetadata: + r = list(find_json_plugins(search_dirs, [requested_plugin])) + assert len(r) == 1 + return r[0] + + +def find_related_implementation_files_for_plugin( + plugin_metadata: DebputyPluginMetadata, +) -> List[str]: + plugin_path = plugin_metadata.plugin_path + if not os.path.isfile(plugin_path): + plugin_name = plugin_metadata.plugin_name + _error( + f"Cannot run find related files for {plugin_name}: The plugin seems to be bundled" + " or loaded via a mechanism that does not support detecting its tests." + ) + files = [] + module_name, module_file = _find_plugin_implementation_file( + plugin_metadata.plugin_name, + plugin_metadata.plugin_path, + ) + if os.path.isfile(module_file): + files.append(module_file) + else: + if not plugin_metadata.is_loaded: + plugin_metadata.load_plugin() + if module_name in sys.modules: + _error( + f'The plugin {plugin_metadata.plugin_name} uses the "module"" key in its' + f" JSON metadata file ({plugin_metadata.plugin_path}) and cannot be " + f" installed via this method. The related Python would not be installed" + f" (which would result in a plugin that would fail to load)" + ) + + return files + + +def find_tests_for_plugin( + plugin_metadata: DebputyPluginMetadata, +) -> List[str]: + plugin_name = plugin_metadata.plugin_name + plugin_path = plugin_metadata.plugin_path + + if not os.path.isfile(plugin_path): + _error( + f"Cannot run tests for {plugin_name}: The plugin seems to be bundled or loaded via a" + " mechanism that does not support detecting its tests." + ) + + plugin_dir = os.path.dirname(plugin_path) + test_basename_prefix = plugin_metadata.plugin_name.replace("-", "_") + tests = [] + with os.scandir(plugin_dir) as dir_iter: + for p in dir_iter: + if ( + p.is_file() + and p.name.startswith(test_basename_prefix) + and PLUGIN_TEST_SUFFIX.search(p.name) + ): + tests.append(p.path) + return tests + + +def find_json_plugins( + search_dirs: Sequence[str], + requested_plugins: Iterable[str], +) -> Iterable[DebputyPluginMetadata]: + for plugin_name_or_path in requested_plugins: + found = False + if "/" in plugin_name_or_path: + if not os.path.isfile(plugin_name_or_path): + raise PluginNotFoundError( + f"Unable to load the plugin {plugin_name_or_path}: The path is not a file." + ' (Because the plugin name contains "/", it is assumed to be a path and search path' + " is not used." + ) + yield parse_json_plugin_desc(plugin_name_or_path) + return + for search_dir in search_dirs: + path = os.path.join( + search_dir, "debputy", "plugins", f"{plugin_name_or_path}.json" + ) + if not os.path.isfile(path): + continue + found = True + yield parse_json_plugin_desc(path) + if not found: + search_dir_str = ":".join(search_dirs) + raise PluginNotFoundError( + f"Unable to load the plugin {plugin_name_or_path}: Could not find {plugin_name_or_path}.json in the" + f" debputy/plugins subdir of any of the search dirs ({search_dir_str})" + ) + + +def _find_all_json_plugins( + search_dirs: Sequence[str], + required_plugins: AbstractSet[str], + debug_mode: bool = False, +) -> Iterable[DebputyPluginMetadata]: + seen = set(required_plugins) + error_seen = False + for search_dir in search_dirs: + try: + dir_fd = os.scandir(os.path.join(search_dir, "debputy", "plugins")) + except FileNotFoundError: + continue + with dir_fd: + for entry in dir_fd: + if ( + not entry.is_file(follow_symlinks=True) + or not entry.name.endswith(".json") + or entry.name in seen + ): + continue + try: + plugin_metadata = parse_json_plugin_desc(entry.path) + except PluginBaseError as e: + if debug_mode: + raise + if not error_seen: + error_seen = True + _warn( + f"Failed to load the plugin in {entry.path} due to the following error: {e.message}" + ) + else: + _warn( + f"Failed to load plugin in {entry.path} due to errors (not shown)." + ) + else: + yield plugin_metadata + + +def _find_plugin_implementation_file( + plugin_name: str, + json_file_path: str, +) -> Tuple[str, str]: + guessed_module_basename = plugin_name.replace("-", "_") + module_name = f"debputy.plugin.{guessed_module_basename}" + module_fs_path = os.path.join( + os.path.dirname(json_file_path), f"{guessed_module_basename}.py" + ) + return module_name, module_fs_path + + +def _resolve_module_initializer( + plugin_name: str, + plugin_initializer_name: str, + module_name: Optional[str], + json_file_path: str, +) -> PluginInitializationEntryPoint: + module = None + module_fs_path = None + if module_name is None: + module_name, module_fs_path = _find_plugin_implementation_file( + plugin_name, json_file_path + ) + if os.path.isfile(module_fs_path): + spec = importlib.util.spec_from_file_location(module_name, module_fs_path) + if spec is None: + raise PluginInitializationError( + f"Failed to load {plugin_name} (path: {module_fs_path})." + " The spec_from_file_location function returned None." + ) + mod = importlib.util.module_from_spec(spec) + loader = spec.loader + if loader is None: + raise PluginInitializationError( + f"Failed to load {plugin_name} (path: {module_fs_path})." + " Python could not find a suitable loader (spec.loader was None)" + ) + sys.modules[module_name] = mod + try: + loader.exec_module(mod) + except (Exception, GeneratorExit) as e: + raise PluginInitializationError( + f"Failed to load {plugin_name} (path: {module_fs_path})." + " The module threw an exception while being loaded." + ) from e + module = mod + + if module is None: + try: + module = importlib.import_module(module_name) + except ModuleNotFoundError as e: + if module_fs_path is None: + raise PluginMetadataError( + f'The plugin defined in "{json_file_path}" wanted to load the module "{module_name}", but' + " this module is not available in the python search path" + ) from e + raise PluginInitializationError( + f"Failed to load {plugin_name}. Tried loading it from" + f' "{module_fs_path}" (which did not exist) and PYTHONPATH as' + f" {module_name} (where it was not found either). Please ensure" + " the module code is installed in the correct spot or provide an" + f' explicit "module" definition in {json_file_path}.' + ) from e + + plugin_initializer = getattr(module, plugin_initializer_name) + + if plugin_initializer is None: + raise PluginMetadataError( + f'The plugin defined in {json_file_path} claimed that module "{module_name}" would have an' + f" attribute called {plugin_initializer}. However, it does not. Please correct the plugin" + f" metadata or initializer name in the Python module." + ) + return cast("PluginInitializationEntryPoint", plugin_initializer) + + +def _json_plugin_loader( + plugin_name: str, + plugin_json_metadata: PluginJsonMetadata, + json_file_path: str, + attribute_path: AttributePath, +) -> Callable[["DebputyPluginInitializer"], None]: + api_compat = plugin_json_metadata["api_compat_version"] + module_name = plugin_json_metadata.get("module") + plugin_initializer_name = plugin_json_metadata.get("plugin_initializer") + packager_provided_files_raw = plugin_json_metadata.get( + "packager_provided_files", [] + ) + manifest_variables_raw = plugin_json_metadata.get("manifest_variables") + known_packaging_files_raw = plugin_json_metadata.get("known_packaging_files") + if api_compat != 1: + raise PluginMetadataError( + f'The plugin defined in "{json_file_path}" requires API compat level {api_compat}, but this' + f" version of debputy only supports API compat version of 1" + ) + if plugin_initializer_name is not None and "." in plugin_initializer_name: + p = attribute_path["plugin_initializer"] + raise PluginMetadataError( + f'The "{p}" must not contain ".". Problematic file is "{json_file_path}".' + ) + + plugin_initializers = [] + + if plugin_initializer_name is not None: + plugin_initializer = _resolve_module_initializer( + plugin_name, + plugin_initializer_name, + module_name, + json_file_path, + ) + plugin_initializers.append(plugin_initializer) + + if known_packaging_files_raw: + kpf_root_path = attribute_path["known_packaging_files"] + known_packaging_files = [] + for k, v in enumerate(known_packaging_files_raw): + kpf_path = kpf_root_path[k] + p = v.get("path") + if isinstance(p, str): + kpf_path.path_hint = p + if plugin_name.startswith("debputy-") and isinstance(v, dict): + docs = v.get("documentation-uris") + if docs is not None and isinstance(docs, list): + docs = [ + ( + d.replace("@DEBPUTY_DOC_ROOT_DIR@", DEBPUTY_DOC_ROOT_DIR) + if isinstance(d, str) + else d + ) + for d in docs + ] + v["documentation-uris"] = docs + known_packaging_file: KnownPackagingFileInfo = ( + PLUGIN_KNOWN_PACKAGING_FILES_PARSER.parse_input( + v, + kpf_path, + ) + ) + known_packaging_files.append((kpf_path, known_packaging_file)) + + def _initialize_json_provided_known_packaging_files( + api: DebputyPluginInitializerProvider, + ) -> None: + for p, details in known_packaging_files: + try: + api.known_packaging_files(details) + except ValueError as ex: + raise PluginMetadataError( + f"Error while processing {p.path} defined in {json_file_path}: {ex.args[0]}" + ) + + plugin_initializers.append(_initialize_json_provided_known_packaging_files) + + if manifest_variables_raw: + manifest_var_path = attribute_path["manifest_variables"] + manifest_variables = [ + PLUGIN_MANIFEST_VARS_PARSER.parse_input(p, manifest_var_path[i]) + for i, p in enumerate(manifest_variables_raw) + ] + + def _initialize_json_provided_manifest_vars( + api: DebputyPluginInitializer, + ) -> None: + for idx, manifest_variable in enumerate(manifest_variables): + name = manifest_variable["name"] + value = manifest_variable["value"] + doc = manifest_variable.get("reference_documentation") + try: + api.manifest_variable( + name, value, variable_reference_documentation=doc + ) + except ValueError as ex: + var_path = manifest_var_path[idx] + raise PluginMetadataError( + f"Error while processing {var_path.path} defined in {json_file_path}: {ex.args[0]}" + ) + + plugin_initializers.append(_initialize_json_provided_manifest_vars) + + if packager_provided_files_raw: + ppf_path = attribute_path["packager_provided_files"] + ppfs = [ + PLUGIN_PPF_PARSER.parse_input(p, ppf_path[i]) + for i, p in enumerate(packager_provided_files_raw) + ] + + def _initialize_json_provided_ppfs(api: DebputyPluginInitializer) -> None: + ppf: PackagerProvidedFileJsonDescription + for idx, ppf in enumerate(ppfs): + c = dict(ppf) + stem = ppf["stem"] + installed_path = ppf["installed_path"] + default_mode = ppf.get("default_mode") + ref_doc_dict = ppf.get("reference_documentation") + if default_mode is not None: + c["default_mode"] = default_mode.octal_mode + + if ref_doc_dict is not None: + ref_doc = packager_provided_file_reference_documentation( + **ref_doc_dict + ) + else: + ref_doc = None + + for k in [ + "stem", + "installed_path", + "reference_documentation", + ]: + try: + del c[k] + except KeyError: + pass + + try: + api.packager_provided_file(stem, installed_path, reference_documentation=ref_doc, **c) # type: ignore + except ValueError as ex: + p_path = ppf_path[idx] + raise PluginMetadataError( + f"Error while processing {p_path.path} defined in {json_file_path}: {ex.args[0]}" + ) + + plugin_initializers.append(_initialize_json_provided_ppfs) + + if not plugin_initializers: + raise PluginMetadataError( + f"The plugin defined in {json_file_path} does not seem to provide features, " + f" such as module + plugin-initializer or packager-provided-files." + ) + + if len(plugin_initializers) == 1: + return plugin_initializers[0] + + def _chain_loader(api: DebputyPluginInitializer) -> None: + for initializer in plugin_initializers: + initializer(api) + + return _chain_loader + + +@contextlib.contextmanager +def _open(path: str, fd: Optional[IO[bytes]] = None) -> Iterator[IO[bytes]]: + if fd is not None: + yield fd + else: + with open(path, "rb") as fd: + yield fd + + +def parse_json_plugin_desc( + path: str, *, fd: Optional[IO[bytes]] = None +) -> DebputyPluginMetadata: + with _open(path, fd=fd) as rfd: + try: + raw = json.load(rfd) + except JSONDecodeError as e: + raise PluginMetadataError( + f'The plugin defined in "{path}" could not be parsed as valid JSON: {e.args[0]}' + ) from e + plugin_name = os.path.basename(path) + if plugin_name.endswith(".json"): + plugin_name = plugin_name[:-5] + elif plugin_name.endswith(".json.in"): + plugin_name = plugin_name[:-8] + + if plugin_name == "debputy": + # Provide a better error message than "The plugin has already loaded!?" + raise PluginMetadataError( + f'The plugin named {plugin_name} must be bundled with `debputy`. Please rename "{path}" so it does not' + f" clash with the bundled plugin of same name." + ) + + attribute_path = AttributePath.root_path() + + try: + plugin_json_metadata = PLUGIN_METADATA_PARSER.parse_input( + raw, + attribute_path, + ) + except ManifestParseException as e: + raise PluginMetadataError( + f'The plugin defined in "{path}" was valid JSON but could not be parsed: {e.message}' + ) from e + api_compat = plugin_json_metadata["api_compat_version"] + + return DebputyPluginMetadata( + plugin_name=plugin_name, + plugin_loader=lambda: _json_plugin_loader( + plugin_name, + plugin_json_metadata, + path, + attribute_path, + ), + api_compat_version=api_compat, + plugin_initializer=None, + plugin_path=path, + ) + + +@dataclasses.dataclass(slots=True, frozen=True) +class ServiceDefinitionImpl(ServiceDefinition[DSD]): + name: str + names: Sequence[str] + path: VirtualPath + type_of_service: str + service_scope: str + auto_enable_on_install: bool + auto_start_in_install: bool + on_upgrade: ServiceUpgradeRule + definition_source: str + is_plugin_provided_definition: bool + service_context: Optional[DSD] + + +class ServiceRegistryImpl(ServiceRegistry[DSD]): + __slots__ = ("_service_manager_details", "_service_definitions") + + def __init__(self, service_manager_details: ServiceManagerDetails) -> None: + self._service_manager_details = service_manager_details + self._service_definitions: List[ServiceDefinition[DSD]] = [] + + @property + def detected_services(self) -> Sequence[ServiceDefinition[DSD]]: + return self._service_definitions + + def register_service( + self, + path: VirtualPath, + name: Union[str, List[str]], + *, + type_of_service: str = "service", # "timer", etc. + service_scope: str = "system", + enable_by_default: bool = True, + start_by_default: bool = True, + default_upgrade_rule: ServiceUpgradeRule = "restart", + service_context: Optional[DSD] = None, + ) -> None: + names = name if isinstance(name, list) else [name] + if len(names) < 1: + raise ValueError( + f"The service must have at least one name - {path.absolute} did not have any" + ) + # TODO: We cannot create a service definition immediate once the manifest is involved + self._service_definitions.append( + ServiceDefinitionImpl( + names[0], + names, + path, + type_of_service, + service_scope, + enable_by_default, + start_by_default, + default_upgrade_rule, + f"Auto-detected by plugin {self._service_manager_details.plugin_metadata.plugin_name}", + True, + service_context, + ) + ) diff --git a/src/debputy/plugin/api/impl_types.py b/src/debputy/plugin/api/impl_types.py new file mode 100644 index 0000000..f32b008 --- /dev/null +++ b/src/debputy/plugin/api/impl_types.py @@ -0,0 +1,1161 @@ +import dataclasses +import os.path +import textwrap +from typing import ( + Optional, + Callable, + FrozenSet, + Dict, + List, + Tuple, + Generic, + TYPE_CHECKING, + TypeVar, + cast, + Any, + Sequence, + Union, + Type, + TypedDict, + Iterable, + Mapping, + NotRequired, + Literal, + Set, + Iterator, +) +from weakref import ref + +from debputy import DEBPUTY_DOC_ROOT_DIR +from debputy.exceptions import ( + DebputyFSIsROError, + PluginAPIViolationError, + PluginConflictError, + UnhandledOrUnexpectedErrorFromPluginError, +) +from debputy.filesystem_scan import as_path_def +from debputy.installations import InstallRule +from debputy.maintscript_snippet import DpkgMaintscriptHelperCommand +from debputy.manifest_conditions import ManifestCondition +from debputy.manifest_parser.base_types import DebputyParsedContent, TypeMapping +from debputy.manifest_parser.exceptions import ManifestParseException +from debputy.manifest_parser.util import AttributePath +from debputy.packages import BinaryPackage +from debputy.plugin.api import ( + VirtualPath, + BinaryCtrlAccessor, + PackageProcessingContext, +) +from debputy.plugin.api.spec import ( + DebputyPluginInitializer, + MetadataAutoDetector, + DpkgTriggerType, + ParserDocumentation, + PackageProcessor, + PathDef, + ParserAttributeDocumentation, + undocumented_attr, + documented_attr, + reference_documentation, + PackagerProvidedFileReferenceDocumentation, + TypeMappingDocumentation, +) +from debputy.substitution import VariableContext +from debputy.transformation_rules import TransformationRule +from debputy.util import _normalize_path, package_cross_check_precheck + +if TYPE_CHECKING: + from debputy.plugin.api.spec import ( + ServiceDetector, + ServiceIntegrator, + PackageTypeSelector, + ) + from debputy.manifest_parser.parser_data import ParserContextData + from debputy.highlevel_manifest import ( + HighLevelManifest, + PackageTransformationDefinition, + BinaryPackageData, + ) + + +_PACKAGE_TYPE_DEB_ONLY = frozenset(["deb"]) +_ALL_PACKAGE_TYPES = frozenset(["deb", "udeb"]) + + +TD = TypeVar("TD", bound="DebputyParsedContent") +PF = TypeVar("PF") +SF = TypeVar("SF") +TP = TypeVar("TP") +TTP = Type[TP] + +DIPKWHandler = Callable[[str, AttributePath, "ParserContextData"], TP] +DIPHandler = Callable[[str, PF, AttributePath, "ParserContextData"], TP] + + +def resolve_package_type_selectors( + package_type: "PackageTypeSelector", +) -> FrozenSet[str]: + if package_type is _ALL_PACKAGE_TYPES or package_type is _PACKAGE_TYPE_DEB_ONLY: + return cast("FrozenSet[str]", package_type) + if isinstance(package_type, str): + return ( + _PACKAGE_TYPE_DEB_ONLY + if package_type == "deb" + else frozenset([package_type]) + ) + else: + return frozenset(package_type) + + +@dataclasses.dataclass(slots=True) +class DebputyPluginMetadata: + plugin_name: str + api_compat_version: int + plugin_loader: Optional[Callable[[], Callable[["DebputyPluginInitializer"], None]]] + plugin_initializer: Optional[Callable[["DebputyPluginInitializer"], None]] + plugin_path: str + _is_initialized: bool = False + + @property + def is_loaded(self) -> bool: + return self.plugin_initializer is not None + + @property + def is_initialized(self) -> bool: + return self._is_initialized + + def initialize_plugin(self, api: "DebputyPluginInitializer") -> None: + if self.is_initialized: + raise RuntimeError("Cannot load plugins twice") + if not self.is_loaded: + self.load_plugin() + plugin_initializer = self.plugin_initializer + assert plugin_initializer is not None + plugin_initializer(api) + self._is_initialized = True + + def load_plugin(self) -> None: + plugin_loader = self.plugin_loader + assert plugin_loader is not None + self.plugin_initializer = plugin_loader() + assert self.plugin_initializer is not None + + +@dataclasses.dataclass(slots=True, frozen=True) +class PluginProvidedParser(Generic[PF, TP]): + parser: "DeclarativeInputParser[PF]" + handler: Callable[[str, PF, "AttributePath", "ParserContextData"], TP] + plugin_metadata: DebputyPluginMetadata + + def parse( + self, + name: str, + value: object, + attribute_path: "AttributePath", + *, + parser_context: "ParserContextData", + ) -> TP: + parsed_value = self.parser.parse_input( + value, attribute_path, parser_context=parser_context + ) + return self.handler(name, parsed_value, attribute_path, parser_context) + + +class PPFFormatParam(TypedDict): + priority: Optional[int] + name: str + owning_package: str + + +@dataclasses.dataclass(slots=True, frozen=True) +class PackagerProvidedFileClassSpec: + debputy_plugin_metadata: DebputyPluginMetadata + stem: str + installed_as_format: str + default_mode: int + default_priority: Optional[int] + allow_name_segment: bool + allow_architecture_segment: bool + post_formatting_rewrite: Optional[Callable[[str], str]] + packageless_is_fallback_for_all_packages: bool + reservation_only: bool + formatting_callback: Optional[Callable[[str, PPFFormatParam, VirtualPath], str]] = ( + None + ) + reference_documentation: Optional[PackagerProvidedFileReferenceDocumentation] = None + bug_950723: bool = False + + @property + def supports_priority(self) -> bool: + return self.default_priority is not None + + def compute_dest( + self, + assigned_name: str, + # Note this method is currently used 1:1 inside plugin tests. + *, + owning_package: Optional[str] = None, + assigned_priority: Optional[int] = None, + path: Optional[VirtualPath] = None, + ) -> Tuple[str, str]: + if assigned_priority is not None and not self.supports_priority: + raise ValueError( + f"Cannot assign priority to packager provided files with stem" + f' "{self.stem}" (e.g., "debian/foo.{self.stem}"). They' + " do not use priority at all." + ) + + path_format = self.installed_as_format + if self.supports_priority and assigned_priority is None: + assigned_priority = self.default_priority + + if owning_package is None: + owning_package = assigned_name + + params: PPFFormatParam = { + "priority": assigned_priority, + "name": assigned_name, + "owning_package": owning_package, + } + + if self.formatting_callback is not None: + if path is None: + raise ValueError( + "The path parameter is required for PPFs with formatting_callback" + ) + dest_path = self.formatting_callback(path_format, params, path) + else: + dest_path = path_format.format(**params) + + dirname, basename = os.path.split(dest_path) + dirname = _normalize_path(dirname) + + if self.post_formatting_rewrite: + basename = self.post_formatting_rewrite(basename) + return dirname, basename + + +@dataclasses.dataclass(slots=True) +class MetadataOrMaintscriptDetector: + plugin_metadata: DebputyPluginMetadata + detector_id: str + detector: MetadataAutoDetector + applies_to_package_types: FrozenSet[str] + enabled: bool = True + + def applies_to(self, binary_package: BinaryPackage) -> bool: + return binary_package.package_type in self.applies_to_package_types + + def run_detector( + self, + fs_root: "VirtualPath", + ctrl: "BinaryCtrlAccessor", + context: "PackageProcessingContext", + ) -> None: + try: + self.detector(fs_root, ctrl, context) + except DebputyFSIsROError as e: + nv = self.plugin_metadata.plugin_name + raise PluginAPIViolationError( + f'The plugin {nv} violated the API contract for "metadata detectors"' + " by attempting to mutate the provided file system in its metadata detector" + f" with id {self.detector_id}. File system mutation is *not* supported at" + " this stage (file system layout is committed and the attempted changes" + " would be lost)." + ) from e + except (ChildProcessError, RuntimeError, AttributeError) as e: + nv = f"{self.plugin_metadata.plugin_name}" + raise UnhandledOrUnexpectedErrorFromPluginError( + f"The plugin {nv} threw an unhandled or unexpected exception from its metadata" + f" detector with id {self.detector_id}." + ) from e + + +class DeclarativeInputParser(Generic[TD]): + @property + def inline_reference_documentation(self) -> Optional[ParserDocumentation]: + return None + + @property + def reference_documentation_url(self) -> Optional[str]: + doc = self.inline_reference_documentation + return doc.documentation_reference_url if doc is not None else None + + def parse_input( + self, + value: object, + path: "AttributePath", + *, + parser_context: Optional["ParserContextData"] = None, + ) -> TD: + raise NotImplementedError + + +class DispatchingParserBase(Generic[TP]): + def __init__(self, manifest_attribute_path_template: str) -> None: + self.manifest_attribute_path_template = manifest_attribute_path_template + self._parsers: Dict[str, PluginProvidedParser[Any, TP]] = {} + + def is_known_keyword(self, keyword: str) -> bool: + return keyword in self._parsers + + def registered_keywords(self) -> Iterable[str]: + yield from self._parsers + + def parser_for(self, keyword: str) -> PluginProvidedParser[Any, TP]: + return self._parsers[keyword] + + def register_keyword( + self, + keyword: Union[str, Sequence[str]], + handler: DIPKWHandler, + plugin_metadata: DebputyPluginMetadata, + *, + inline_reference_documentation: Optional[ParserDocumentation] = None, + ) -> None: + reference_documentation_url = None + if inline_reference_documentation: + if inline_reference_documentation.attribute_doc: + raise ValueError( + "Cannot provide per-attribute documentation for a value-less keyword!" + ) + if inline_reference_documentation.alt_parser_description: + raise ValueError( + "Cannot provide non-mapping-format documentation for a value-less keyword!" + ) + reference_documentation_url = ( + inline_reference_documentation.documentation_reference_url + ) + parser = DeclarativeValuelessKeywordInputParser( + inline_reference_documentation, + documentation_reference=reference_documentation_url, + ) + + def _combined_handler( + name: str, + _ignored: Any, + attr_path: AttributePath, + context: "ParserContextData", + ) -> TP: + return handler(name, attr_path, context) + + p = PluginProvidedParser( + parser, + _combined_handler, + plugin_metadata, + ) + + self._add_parser(keyword, p) + + def register_parser( + self, + keyword: Union[str, List[str]], + parser: "DeclarativeInputParser[PF]", + handler: Callable[[str, PF, "AttributePath", "ParserContextData"], TP], + plugin_metadata: DebputyPluginMetadata, + ) -> None: + p = PluginProvidedParser( + parser, + handler, + plugin_metadata, + ) + self._add_parser(keyword, p) + + def _add_parser( + self, + keyword: Union[str, List[str]], + ppp: "PluginProvidedParser[PF, TP]", + ) -> None: + ks = [keyword] if isinstance(keyword, str) else keyword + for k in ks: + existing_parser = self._parsers.get(k) + if existing_parser is not None: + message = ( + f'The rule name "{k}" is already taken by the plugin' + f" {existing_parser.plugin_metadata.plugin_name}. This conflict was triggered" + f" when plugin {ppp.plugin_metadata.plugin_name} attempted to register its parser." + ) + raise PluginConflictError( + message, + existing_parser.plugin_metadata, + ppp.plugin_metadata, + ) + self._new_parser(k, ppp) + + def _new_parser(self, keyword: str, ppp: "PluginProvidedParser[PF, TP]") -> None: + self._parsers[keyword] = ppp + + def parse( + self, + orig_value: object, + attribute_path: "AttributePath", + *, + parser_context: "ParserContextData", + ) -> TP: + raise NotImplementedError + + +class DispatchingObjectParser( + DispatchingParserBase[Mapping[str, Any]], + DeclarativeInputParser[Mapping[str, Any]], +): + def __init__( + self, + manifest_attribute_path_template: str, + *, + parser_documentation: Optional[ParserDocumentation] = None, + ) -> None: + super().__init__(manifest_attribute_path_template) + self._attribute_documentation: List[ParserAttributeDocumentation] = [] + if parser_documentation is None: + parser_documentation = reference_documentation() + self._parser_documentation = parser_documentation + + @property + def reference_documentation_url(self) -> Optional[str]: + return self._parser_documentation.documentation_reference_url + + @property + def inline_reference_documentation(self) -> Optional[ParserDocumentation]: + ref_doc = self._parser_documentation + return reference_documentation( + title=ref_doc.title, + description=ref_doc.description, + attributes=self._attribute_documentation, + reference_documentation_url=self.reference_documentation_url, + ) + + def _new_parser(self, keyword: str, ppp: "PluginProvidedParser[PF, TP]") -> None: + super()._new_parser(keyword, ppp) + doc = ppp.parser.inline_reference_documentation + if doc is None or doc.description is None: + self._attribute_documentation.append(undocumented_attr(keyword)) + else: + self._attribute_documentation.append( + documented_attr(keyword, doc.description) + ) + + def register_child_parser( + self, + keyword: str, + parser: "DispatchingObjectParser", + plugin_metadata: DebputyPluginMetadata, + *, + on_end_parse_step: Optional[ + Callable[ + [str, Optional[Mapping[str, Any]], AttributePath, "ParserContextData"], + None, + ] + ] = None, + ) -> None: + def _handler( + name: str, + value: Mapping[str, Any], + path: AttributePath, + parser_context: "ParserContextData", + ) -> Mapping[str, Any]: + on_end_parse_step(name, value, path, parser_context) + return value + + p = PluginProvidedParser( + parser, + _handler, + plugin_metadata, + ) + self._add_parser(keyword, p) + + # FIXME: Agree on naming (parse vs. parse_input) + def parse_input( + self, + value: object, + path: "AttributePath", + *, + parser_context: Optional["ParserContextData"] = None, + ) -> TD: + return self.parse(value, path, parser_context=parser_context) + + def parse( + self, + orig_value: object, + attribute_path: "AttributePath", + *, + parser_context: "ParserContextData", + ) -> TP: + doc_ref = "" + if self.reference_documentation_url is not None: + doc_ref = ( + f" Please see {self.reference_documentation_url} for the documentation." + ) + if not isinstance(orig_value, dict): + raise ManifestParseException( + f"The attribute {attribute_path.path} must be a non-empty mapping.{doc_ref}" + ) + if not orig_value: + raise ManifestParseException( + f"The attribute {attribute_path.path} must be a non-empty mapping.{doc_ref}" + ) + result = {} + unknown_keys = orig_value.keys() - self._parsers.keys() + if unknown_keys: + first_key = next(iter(unknown_keys)) + remaining_valid_attributes = self._parsers.keys() - orig_value.keys() + if not remaining_valid_attributes: + raise ManifestParseException( + f'The attribute "{first_key}" is not applicable at {attribute_path.path} (with the' + f" current set of plugins).{doc_ref}" + ) + remaining_valid_attribute_names = ", ".join(remaining_valid_attributes) + raise ManifestParseException( + f'The attribute "{first_key}" is not applicable at {attribute_path.path}(with the current set' + " of plugins). Possible attributes available (and not already used) are:" + f" {remaining_valid_attribute_names}.{doc_ref}" + ) + # Parse order is important for the root level (currently we use rule registration order) + for key, provided_parser in self._parsers.items(): + value = orig_value.get(key) + if value is None: + if isinstance(provided_parser.parser, DispatchingObjectParser): + provided_parser.handler( + key, {}, attribute_path[key], parser_context + ) + continue + value_path = attribute_path[key] + if provided_parser is None: + valid_keys = ", ".join(sorted(self._parsers.keys())) + raise ManifestParseException( + f'Unknown or unsupported option "{key}" at {value_path.path}.' + " Valid options at this location are:" + f" {valid_keys}\n{doc_ref}" + ) + parsed_value = provided_parser.parse( + key, value, value_path, parser_context=parser_context + ) + result[key] = parsed_value + return result + + +class DispatchingTableParser(DispatchingParserBase[TP]): + def __init__(self, base_type: TTP, manifest_attribute_path_template: str) -> None: + super().__init__(manifest_attribute_path_template) + self.base_type = base_type + + def parse( + self, + orig_value: object, + attribute_path: "AttributePath", + *, + parser_context: "ParserContextData", + ) -> TP: + if isinstance(orig_value, str): + key = orig_value + value = None + value_path = attribute_path + elif isinstance(orig_value, dict): + if len(orig_value) != 1: + valid_keys = ", ".join(sorted(self._parsers.keys())) + raise ManifestParseException( + f'The mapping "{attribute_path.path}" had two keys, but it should only have one top level key.' + " Maybe you are missing a list marker behind the second key or some indentation. The" + f" possible keys are: {valid_keys}" + ) + key, value = next(iter(orig_value.items())) + value_path = attribute_path[key] + else: + raise ManifestParseException( + f"The attribute {attribute_path.path} must be a string or a mapping." + ) + provided_parser = self._parsers.get(key) + if provided_parser is None: + valid_keys = ", ".join(sorted(self._parsers.keys())) + raise ManifestParseException( + f'Unknown or unsupported action "{key}" at {value_path.path}.' + " Valid actions at this location are:" + f" {valid_keys}" + ) + return provided_parser.parse( + key, value, value_path, parser_context=parser_context + ) + + +@dataclasses.dataclass(slots=True) +class DeclarativeValuelessKeywordInputParser(DeclarativeInputParser[None]): + inline_reference_documentation: Optional[ParserDocumentation] = None + documentation_reference: Optional[str] = None + + def parse_input( + self, + value: object, + path: "AttributePath", + *, + parser_context: Optional["ParserContextData"] = None, + ) -> TD: + if value is None: + return cast("TD", value) + if self.documentation_reference is not None: + doc_ref = f" (Documentation: {self.documentation_reference})" + else: + doc_ref = "" + raise ManifestParseException( + f"Expected attribute {path.path} to be a string.{doc_ref}" + ) + + +SUPPORTED_DISPATCHABLE_TABLE_PARSERS = { + InstallRule: "installations", + TransformationRule: "packages.{{PACKAGE}}.transformations", + DpkgMaintscriptHelperCommand: "packages.{{PACKAGE}}.conffile-management", + ManifestCondition: "*.when", +} + +OPARSER_MANIFEST_ROOT = "<ROOT>" +OPARSER_PACKAGES = "packages.{{PACKAGE}}" +OPARSER_MANIFEST_DEFINITIONS = "definitions" + +SUPPORTED_DISPATCHABLE_OBJECT_PARSERS = { + OPARSER_MANIFEST_ROOT: reference_documentation( + reference_documentation_url=f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md", + ), + OPARSER_MANIFEST_DEFINITIONS: reference_documentation( + title="Packager provided definitions", + description="Reusable packager provided definitions such as manifest variables.", + reference_documentation_url=f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md#packager-provided-definitions", + ), + OPARSER_PACKAGES: reference_documentation( + title="Binary package rules", + description=textwrap.dedent( + """\ + Inside the manifest, the `packages` mapping can be used to define requests for the binary packages + you want `debputy` to produce. Each key inside `packages` must be the name of a binary package + defined in `debian/control`. The value is a dictionary defining which features that `debputy` + should apply to that binary package. An example could be: + + packages: + foo: + transformations: + - create-symlink: + path: usr/share/foo/my-first-symlink + target: /usr/share/bar/symlink-target + - create-symlink: + path: usr/lib/{{DEB_HOST_MULTIARCH}}/my-second-symlink + target: /usr/lib/{{DEB_HOST_MULTIARCH}}/baz/symlink-target + bar: + transformations: + - create-directories: + - some/empty/directory.d + - another/empty/integration-point.d + - create-directories: + path: a/third-empty/directory.d + owner: www-data + group: www-data + + In this case, `debputy` will create some symlinks inside the `foo` package and some directories for + the `bar` package. The following subsections define the keys you can use under each binary package. + """ + ), + reference_documentation_url=f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md#binary-package-rules", + ), +} + + +@dataclasses.dataclass(slots=True) +class PluginProvidedManifestVariable: + plugin_metadata: DebputyPluginMetadata + variable_name: str + variable_value: Optional[Union[str, Callable[[VariableContext], str]]] + is_context_specific_variable: bool + variable_reference_documentation: Optional[str] = None + is_documentation_placeholder: bool = False + is_for_special_case: bool = False + + @property + def is_internal(self) -> bool: + return self.variable_name.startswith("_") or ":_" in self.variable_name + + @property + def is_token(self) -> bool: + return self.variable_name.startswith("token:") + + def resolve(self, variable_context: VariableContext) -> str: + value_resolver = self.variable_value + if isinstance(value_resolver, str): + res = value_resolver + else: + res = value_resolver(variable_context) + return res + + +@dataclasses.dataclass(slots=True, frozen=True) +class AutomaticDiscardRuleExample: + content: Sequence[Tuple[PathDef, bool]] + description: Optional[str] = None + + +def automatic_discard_rule_example( + *content: Union[str, PathDef, Tuple[Union[str, PathDef], bool]], + example_description: Optional[str] = None, +) -> AutomaticDiscardRuleExample: + """Provide an example for an automatic discard rule + + The return value of this method should be passed to the `examples` parameter of + `automatic_discard_rule` method - either directly for a single example or as a + part of a sequence of examples. + + >>> # Possible example for an exclude rule for ".la" files + >>> # Example shows two files; The ".la" file that will be removed and another file that + >>> # will be kept. + >>> automatic_discard_rule_example( # doctest: +ELLIPSIS + ... "usr/lib/libfoo.la", + ... ("usr/lib/libfoo.so.1.0.0", False), + ... ) + AutomaticDiscardRuleExample(...) + + Keep in mind that you have to explicitly include directories that are relevant for the test + if you want them shown. Also, if a directory is excluded, all path beneath it will be + automatically excluded in the example as well. Your example data must account for that. + + >>> # Possible example for python cache file discard rule + >>> # In this example, we explicitly list the __pycache__ directory itself because we + >>> # want it shown in the output (otherwise, we could have omitted it) + >>> automatic_discard_rule_example( # doctest: +ELLIPSIS + ... (".../foo.py", False), + ... ".../__pycache__/", + ... ".../__pycache__/...", + ... ".../foo.pyc", + ... ".../foo.pyo", + ... ) + AutomaticDiscardRuleExample(...) + + Note: Even if `__pycache__` had been implicit, the result would have been the same. However, + the rendered example would not have shown the directory on its own. The use of `...` as + path names is useful for denoting "anywhere" or "anything". Though, there is nothing "magic" + about this name - it happens to be allowed as a path name (unlike `.` or `..`). + + These examples can be seen via `debputy plugin show automatic-discard-rules <name-here>`. + + :param content: The content of the example. Each element can be either a path definition or + a tuple of a path definition followed by a verdict (boolean). Each provided path definition + describes the paths to be presented in the example. Implicit paths such as parent + directories will be created but not shown in the example. Therefore, if a directory is + relevant to the example, be sure to explicitly list it. + + The verdict associated with a path determines whether the path should be discarded (when + True) or kept (when False). When a path is not explicitly associated with a verdict, the + verdict is assumed to be discarded (True). + :param example_description: An optional description displayed together with the example. + :return: An opaque data structure containing the example. + """ + example = [] + for d in content: + if not isinstance(d, tuple): + pd = d + verdict = True + else: + pd, verdict = d + + path_def = as_path_def(pd) + example.append((path_def, verdict)) + + if not example: + raise ValueError("At least one path must be given for an example") + + return AutomaticDiscardRuleExample( + tuple(example), + description=example_description, + ) + + +@dataclasses.dataclass(slots=True, frozen=True) +class PluginProvidedPackageProcessor: + processor_id: str + applies_to_package_types: FrozenSet[str] + package_processor: PackageProcessor + dependencies: FrozenSet[Tuple[str, str]] + plugin_metadata: DebputyPluginMetadata + + def applies_to(self, binary_package: BinaryPackage) -> bool: + return binary_package.package_type in self.applies_to_package_types + + @property + def dependency_id(self) -> Tuple[str, str]: + return self.plugin_metadata.plugin_name, self.processor_id + + def run_package_processor( + self, + fs_root: "VirtualPath", + unused: None, + context: "PackageProcessingContext", + ) -> None: + self.package_processor(fs_root, unused, context) + + +@dataclasses.dataclass(slots=True, frozen=True) +class PluginProvidedDiscardRule: + name: str + plugin_metadata: DebputyPluginMetadata + discard_check: Callable[[VirtualPath], bool] + reference_documentation: Optional[str] + examples: Sequence[AutomaticDiscardRuleExample] = tuple() + + def should_discard(self, path: VirtualPath) -> bool: + return self.discard_check(path) + + +@dataclasses.dataclass(slots=True, frozen=True) +class ServiceManagerDetails: + service_manager: str + service_detector: "ServiceDetector" + service_integrator: "ServiceIntegrator" + plugin_metadata: DebputyPluginMetadata + + +ReferenceValue = TypedDict( + "ReferenceValue", + { + "description": str, + }, +) + + +def _reference_data_value( + *, + description: str, +) -> ReferenceValue: + return { + "description": description, + } + + +KnownPackagingFileCategories = Literal[ + "generated", + "generic-template", + "ppf-file", + "ppf-control-file", + "maint-config", + "pkg-metadata", + "pkg-helper-config", + "testing", + "lint-config", +] +KNOWN_PACKAGING_FILE_CATEGORY_DESCRIPTIONS: Mapping[ + KnownPackagingFileCategories, ReferenceValue +] = { + "generated": _reference_data_value( + description="The file is (likely) generated from another file" + ), + "generic-template": _reference_data_value( + description="The file is (likely) a generic template that generates a known packaging file. While the" + " file is annotated as if it was the target file, the file might uses a custom template" + " language inside it." + ), + "ppf-file": _reference_data_value( + description="Packager provided file to be installed on the file system - usually as-is." + " When `install-pattern` or `install-path` are provided, this is where the file is installed." + ), + "ppf-control-file": _reference_data_value( + description="Packager provided file that becomes a control file - possible after processing. " + " If `install-pattern` or `install-path` are provided, they denote where the is placed" + " (generally, this will be of the form `DEBIAN/<name>`)" + ), + "maint-config": _reference_data_value( + description="Maintenance configuration for a specific tool that the maintainer uses (tool / style preferences)" + ), + "pkg-metadata": _reference_data_value( + description="The file is related to standard package metadata (usually documented in Debian Policy)" + ), + "pkg-helper-config": _reference_data_value( + description="The file is packaging helper configuration or instruction file" + ), + "testing": _reference_data_value( + description="The file is related to automated testing (autopkgtests, salsa/gitlab CI)." + ), + "lint-config": _reference_data_value( + description="The file is related to a linter (such as overrides for false-positives or style preferences)" + ), +} + +KnownPackagingConfigFeature = Literal[ + "dh-filearray", + "dh-filedoublearray", + "dh-hash-subst", + "dh-dollar-subst", + "dh-glob", + "dh-partial-glob", + "dh-late-glob", + "dh-glob-after-execute", + "dh-executable-config", + "dh-custom-format", + "dh-file-list", + "dh-install-list", + "dh-install-list-dest-dir-like-dh_install", + "dh-install-list-fixed-dest-dir", + "dh-fixed-dest-dir", + "dh-exec-rename", + "dh-docs-only", +] + +KNOWN_PACKAGING_FILE_CONFIG_FEATURE_DESCRIPTION: Mapping[ + KnownPackagingConfigFeature, ReferenceValue +] = { + "dh-filearray": _reference_data_value( + description="The file will be read as a list of space/newline separated tokens", + ), + "dh-filedoublearray": _reference_data_value( + description="Each line in the file will be read as a list of space-separated tokens", + ), + "dh-hash-subst": _reference_data_value( + description="Supports debhelper #PACKAGE# style substitutions (udebs often excluded)", + ), + "dh-dollar-subst": _reference_data_value( + description="Supports debhelper ${PACKAGE} style substitutions (usually requires compat 13+)", + ), + "dh-glob": _reference_data_value( + description="Supports standard debhelper globing", + ), + "dh-partial-glob": _reference_data_value( + description="Supports standard debhelper globing but only to a subset of the values (implies dh-late-glob)", + ), + "dh-late-glob": _reference_data_value( + description="Globbing is done separately instead of using the built-in function", + ), + "dh-glob-after-execute": _reference_data_value( + description="When the dh config file is executable, the generated output will be subject to globbing", + ), + "dh-executable-config": _reference_data_value( + description="If marked executable, debhelper will execute the file and read its output", + ), + "dh-custom-format": _reference_data_value( + description="The dh tool will or may have a custom parser for this file", + ), + "dh-file-list": _reference_data_value( + description="The dh file contains a list of paths to be processed", + ), + "dh-install-list": _reference_data_value( + description="The dh file contains a list of paths/globs to be installed but the tool specific knowledge" + " required to understand the file cannot be conveyed via this interface.", + ), + "dh-install-list-dest-dir-like-dh_install": _reference_data_value( + description="The dh file is processed similar to dh_install (notably dest-dir handling derived" + " from the path or the last token on the line)", + ), + "dh-install-list-fixed-dest-dir": _reference_data_value( + description="The dh file is an install list and the dest-dir is always the same for all patterns" + " (when `install-pattern` or `install-path` are provided, they identify the directory - not the file location)", + ), + "dh-exec-rename": _reference_data_value( + description="When `dh-exec` is the interpreter of this dh config file, its renaming (=>) feature can be" + " requested/used", + ), + "dh-docs-only": _reference_data_value( + description="The dh config file is used for documentation only. Implicit <!nodocs> Build-Profiles support", + ), +} + +CONFIG_FEATURE_ALIASES: Dict[ + KnownPackagingConfigFeature, List[Tuple[KnownPackagingConfigFeature, int]] +] = { + "dh-filearray": [ + ("dh-filearray", 0), + ("dh-executable-config", 9), + ("dh-dollar-subst", 13), + ], + "dh-filedoublearray": [ + ("dh-filedoublearray", 0), + ("dh-executable-config", 9), + ("dh-dollar-subst", 13), + ], +} + + +def _implies( + features: List[KnownPackagingConfigFeature], + seen: Set[KnownPackagingConfigFeature], + implying: Sequence[KnownPackagingConfigFeature], + implied: KnownPackagingConfigFeature, +) -> None: + if implied in seen: + return + if all(f in seen for f in implying): + seen.add(implied) + features.append(implied) + + +def expand_known_packaging_config_features( + compat_level: int, + features: List[KnownPackagingConfigFeature], +) -> List[KnownPackagingConfigFeature]: + final_features: List[KnownPackagingConfigFeature] = [] + seen = set() + for feature in features: + expanded = CONFIG_FEATURE_ALIASES.get(feature) + if not expanded: + expanded = [(feature, 0)] + for v, c in expanded: + if compat_level < c or v in seen: + continue + seen.add(v) + final_features.append(v) + if "dh-glob" in seen and "dh-late-glob" in seen: + final_features.remove("dh-glob") + + _implies(final_features, seen, ["dh-partial-glob"], "dh-late-glob") + _implies( + final_features, + seen, + ["dh-late-glob", "dh-executable-config"], + "dh-glob-after-execute", + ) + return sorted(final_features) + + +class InstallPatternDHCompatRule(DebputyParsedContent): + install_pattern: NotRequired[str] + add_config_features: NotRequired[List[KnownPackagingConfigFeature]] + starting_with_compat_level: NotRequired[int] + + +class KnownPackagingFileInfo(DebputyParsedContent): + # Exposed directly in the JSON plugin parsing; be careful with changes + path: NotRequired[str] + pkgfile: NotRequired[str] + detection_method: NotRequired[Literal["path", "dh.pkgfile"]] + file_categories: NotRequired[List[KnownPackagingFileCategories]] + documentation_uris: NotRequired[List[str]] + debputy_cmd_templates: NotRequired[List[List[str]]] + debhelper_commands: NotRequired[List[str]] + config_features: NotRequired[List[KnownPackagingConfigFeature]] + install_pattern: NotRequired[str] + dh_compat_rules: NotRequired[List[InstallPatternDHCompatRule]] + default_priority: NotRequired[int] + post_formatting_rewrite: NotRequired[Literal["period-to-underscore"]] + packageless_is_fallback_for_all_packages: NotRequired[bool] + + +@dataclasses.dataclass(slots=True) +class PluginProvidedKnownPackagingFile: + info: KnownPackagingFileInfo + detection_method: Literal["path", "dh.pkgfile"] + detection_value: str + plugin_metadata: DebputyPluginMetadata + + +@dataclasses.dataclass(slots=True, frozen=True) +class PluginProvidedTypeMapping: + mapped_type: TypeMapping[Any, Any] + reference_documentation: Optional[TypeMappingDocumentation] + plugin_metadata: DebputyPluginMetadata + + +class PackageDataTable: + def __init__(self, package_data_table: Mapping[str, "BinaryPackageData"]) -> None: + self._package_data_table = package_data_table + # This is enabled for metadata-detectors. But it is deliberate not enabled for package processors, + # because it is not clear how it should interact with dependencies. For metadata-detectors, things + # read-only and there are no dependencies, so we cannot "get them wrong". + self.enable_cross_package_checks = False + + def __iter__(self) -> Iterator["BinaryPackageData"]: + return iter(self._package_data_table.values()) + + def __getitem__(self, item: str) -> "BinaryPackageData": + return self._package_data_table[item] + + def __contains__(self, item: str) -> bool: + return item in self._package_data_table + + +class PackageProcessingContextProvider(PackageProcessingContext): + __slots__ = ( + "_manifest", + "_binary_package", + "_related_udeb_package", + "_package_data_table", + "_cross_check_cache", + ) + + def __init__( + self, + manifest: "HighLevelManifest", + binary_package: BinaryPackage, + related_udeb_package: Optional[BinaryPackage], + package_data_table: PackageDataTable, + ) -> None: + self._manifest = manifest + self._binary_package = binary_package + self._related_udeb_package = related_udeb_package + self._package_data_table = ref(package_data_table) + self._cross_check_cache: Optional[ + Sequence[Tuple[BinaryPackage, "VirtualPath"]] + ] = None + + def _package_state_for( + self, + package: BinaryPackage, + ) -> "PackageTransformationDefinition": + return self._manifest.package_state_for(package.name) + + def _package_version_for( + self, + package: BinaryPackage, + ) -> str: + package_state = self._package_state_for(package) + version = package_state.binary_version + if version is not None: + return version + return self._manifest.source_version( + include_binnmu_version=not package.is_arch_all + ) + + @property + def binary_package(self) -> BinaryPackage: + return self._binary_package + + @property + def related_udeb_package(self) -> Optional[BinaryPackage]: + return self._related_udeb_package + + @property + def binary_package_version(self) -> str: + return self._package_version_for(self._binary_package) + + @property + def related_udeb_package_version(self) -> Optional[str]: + udeb = self._related_udeb_package + if udeb is None: + return None + return self._package_version_for(udeb) + + def accessible_package_roots(self) -> Iterable[Tuple[BinaryPackage, "VirtualPath"]]: + package_table = self._package_data_table() + if package_table is None: + raise ReferenceError( + "Internal error: package_table was garbage collected too early" + ) + if not package_table.enable_cross_package_checks: + raise PluginAPIViolationError( + "Cross package content checks are not available at this time." + ) + cache = self._cross_check_cache + if cache is None: + matches = [] + pkg = self.binary_package + for pkg_data in package_table: + if pkg_data.binary_package.name == pkg.name: + continue + res = package_cross_check_precheck(pkg, pkg_data.binary_package) + if not res[0]: + continue + matches.append((pkg_data.binary_package, pkg_data.fs_root)) + cache = tuple(matches) if matches else tuple() + self._cross_check_cache = cache + return cache + + +@dataclasses.dataclass(slots=True, frozen=True) +class PluginProvidedTrigger: + dpkg_trigger_type: DpkgTriggerType + dpkg_trigger_target: str + provider: DebputyPluginMetadata + provider_source_id: str + + def serialized_format(self) -> str: + return f"{self.dpkg_trigger_type} {self.dpkg_trigger_target}" diff --git a/src/debputy/plugin/api/plugin_parser.py b/src/debputy/plugin/api/plugin_parser.py new file mode 100644 index 0000000..ad2489f --- /dev/null +++ b/src/debputy/plugin/api/plugin_parser.py @@ -0,0 +1,66 @@ +from typing import NotRequired, List, Any, TypedDict + +from debputy.manifest_parser.base_types import ( + DebputyParsedContent, + OctalMode, + TypeMapping, +) +from debputy.manifest_parser.declarative_parser import ParserGenerator +from debputy.plugin.api.impl_types import KnownPackagingFileInfo + + +class PPFReferenceDocumentation(TypedDict): + description: NotRequired[str] + format_documentation_uris: NotRequired[List[str]] + + +class PackagerProvidedFileJsonDescription(DebputyParsedContent): + stem: str + installed_path: str + default_mode: NotRequired[OctalMode] + default_priority: NotRequired[int] + allow_name_segment: NotRequired[bool] + allow_architecture_segment: NotRequired[bool] + reference_documentation: NotRequired[PPFReferenceDocumentation] + + +class ManifestVariableJsonDescription(DebputyParsedContent): + name: str + value: str + reference_documentation: NotRequired[str] + + +class PluginJsonMetadata(DebputyParsedContent): + api_compat_version: int + module: NotRequired[str] + plugin_initializer: NotRequired[str] + packager_provided_files: NotRequired[List[Any]] + manifest_variables: NotRequired[List[Any]] + known_packaging_files: NotRequired[List[Any]] + + +def _initialize_plugin_metadata_parser_generator() -> ParserGenerator: + pc = ParserGenerator() + pc.register_mapped_type( + TypeMapping( + OctalMode, + str, + lambda v, ap, _: OctalMode.parse_filesystem_mode(v, ap), + ) + ) + return pc + + +PLUGIN_METADATA_PARSER_GENERATOR = _initialize_plugin_metadata_parser_generator() +PLUGIN_METADATA_PARSER = PLUGIN_METADATA_PARSER_GENERATOR.parser_from_typed_dict( + PluginJsonMetadata +) +PLUGIN_PPF_PARSER = PLUGIN_METADATA_PARSER_GENERATOR.parser_from_typed_dict( + PackagerProvidedFileJsonDescription +) +PLUGIN_MANIFEST_VARS_PARSER = PLUGIN_METADATA_PARSER_GENERATOR.parser_from_typed_dict( + ManifestVariableJsonDescription +) +PLUGIN_KNOWN_PACKAGING_FILES_PARSER = ( + PLUGIN_METADATA_PARSER_GENERATOR.parser_from_typed_dict(KnownPackagingFileInfo) +) diff --git a/src/debputy/plugin/api/spec.py b/src/debputy/plugin/api/spec.py new file mode 100644 index 0000000..d034a28 --- /dev/null +++ b/src/debputy/plugin/api/spec.py @@ -0,0 +1,1743 @@ +import contextlib +import dataclasses +import os +import tempfile +import textwrap +from typing import ( + Iterable, + Optional, + Callable, + Literal, + Union, + Iterator, + overload, + FrozenSet, + Sequence, + TypeVar, + Any, + TYPE_CHECKING, + TextIO, + BinaryIO, + Generic, + ContextManager, + List, + Type, + Tuple, +) + +from debian.substvars import Substvars + +from debputy import util +from debputy.exceptions import TestPathWithNonExistentFSPathError, PureVirtualPathError +from debputy.interpreter import Interpreter, extract_shebang_interpreter_from_file +from debputy.manifest_parser.util import parse_symbolic_mode +from debputy.packages import BinaryPackage +from debputy.types import S + +if TYPE_CHECKING: + from debputy.manifest_parser.base_types import ( + StaticFileSystemOwner, + StaticFileSystemGroup, + ) + + +PluginInitializationEntryPoint = Callable[["DebputyPluginInitializer"], None] +MetadataAutoDetector = Callable[ + ["VirtualPath", "BinaryCtrlAccessor", "PackageProcessingContext"], None +] +PackageProcessor = Callable[["VirtualPath", None, "PackageProcessingContext"], None] +DpkgTriggerType = Literal[ + "activate", + "activate-await", + "activate-noawait", + "interest", + "interest-await", + "interest-noawait", +] +Maintscript = Literal["postinst", "preinst", "prerm", "postrm"] +PackageTypeSelector = Union[Literal["deb", "udeb"], Iterable[Literal["deb", "udeb"]]] +ServiceUpgradeRule = Literal[ + "do-nothing", + "reload", + "restart", + "stop-then-start", +] + +DSD = TypeVar("DSD") +ServiceDetector = Callable[ + ["VirtualPath", "ServiceRegistry[DSD]", "PackageProcessingContext"], + None, +] +ServiceIntegrator = Callable[ + [ + Sequence["ServiceDefinition[DSD]"], + "BinaryCtrlAccessor", + "PackageProcessingContext", + ], + None, +] + +PMT = TypeVar("PMT") + + +@dataclasses.dataclass(slots=True, frozen=True) +class PackagerProvidedFileReferenceDocumentation: + description: Optional[str] = None + format_documentation_uris: Sequence[str] = tuple() + + def replace(self, **changes: Any) -> "PackagerProvidedFileReferenceDocumentation": + return dataclasses.replace(self, **changes) + + +def packager_provided_file_reference_documentation( + *, + description: Optional[str] = None, + format_documentation_uris: Optional[Sequence[str]] = tuple(), +) -> PackagerProvidedFileReferenceDocumentation: + """Provide documentation for a given packager provided file. + + :param description: Textual description presented to the user. + :param format_documentation_uris: A sequence of URIs to documentation that describes + the format of the file. Most relevant first. + :return: + """ + uris = tuple(format_documentation_uris) if format_documentation_uris else tuple() + return PackagerProvidedFileReferenceDocumentation( + description=description, + format_documentation_uris=uris, + ) + + +class PathMetadataReference(Generic[PMT]): + """An accessor to plugin provided metadata + + This is a *short-lived* reference to a piece of metadata. It should *not* be stored beyond + the boundaries of the current plugin execution context as it can be become invalid (as an + example, if the path associated with this path is removed, then this reference become invalid) + """ + + @property + def is_present(self) -> bool: + """Determine whether the value has been set + + If the current plugin cannot access the value, then this method unconditionally returns + `False` regardless of whether the value is there. + + :return: `True` if the value has been set to a not None value (and not been deleted). + Otherwise, this property is `False`. + """ + raise NotImplementedError + + @property + def can_read(self) -> bool: + """Test whether it is possible to read the metadata + + Note: That the metadata being readable does *not* imply that the metadata is present. + + :return: True if it is possible to read the metadata. This is always True for the + owning plugin. + """ + raise NotImplementedError + + @property + def can_write(self) -> bool: + """Test whether it is possible to update the metadata + + :return: True if it is possible to update the metadata. + """ + raise NotImplementedError + + @property + def value(self) -> Optional[PMT]: + """Fetch the currently stored value if present. + + :return: The value previously stored if any. Returns `None` if the value was never + stored, explicitly set to `None` or was deleted. + """ + raise NotImplementedError + + @value.setter + def value(self, value: Optional[PMT]) -> None: + """Replace any current value with the provided value + + This operation is only possible if the path is writable *and* the caller is from + the owning plugin OR the owning plugin made the reference read-write. + """ + raise NotImplementedError + + @value.deleter + def value(self) -> None: + """Delete any current value. + + This has the same effect as setting the value to `None`. It has the same restrictions + as the value setter. + """ + self.value = None + + +@dataclasses.dataclass(slots=True) +class PathDef: + path_name: str + mode: Optional[int] = None + mtime: Optional[int] = None + has_fs_path: Optional[bool] = None + fs_path: Optional[str] = None + link_target: Optional[str] = None + content: Optional[str] = None + materialized_content: Optional[str] = None + + +def virtual_path_def( + path_name: str, + /, + mode: Optional[int] = None, + mtime: Optional[int] = None, + fs_path: Optional[str] = None, + link_target: Optional[str] = None, + content: Optional[str] = None, + materialized_content: Optional[str] = None, +) -> PathDef: + """Define a virtual path for use with examples or, in tests, `build_virtual_file_system` + + :param path_name: The full path. Must start with "./". If it ends with "/", the path will be interpreted + as a directory (the `is_dir` attribute will be True). Otherwise, it will be a symlink or file depending + on whether a `link_target` is provided. + :param mode: The mode to use for this path. Defaults to 0644 for files and 0755 for directories. The mode + should be None for symlinks. + :param mtime: Define the last modified time for this path. If not provided, debputy will provide a default + if the mtime attribute is accessed. + :param fs_path: Define a file system path for this path. This causes `has_fs_path` to return True and the + `fs_path` attribute will return this value. The test is required to make this path available to the extent + required. Note that the virtual file system will *not* examine the provided path in any way nor attempt + to resolve defaults from the path. + :param link_target: A target for the symlink. Providing a not None value for this parameter will make the + path a symlink. + :param content: The content of the path (if opened). The path must be a file. + :param materialized_content: Same as `content` except `debputy` will put the contents into a physical file + as needed. Cannot be used with `content` or `fs_path`. + :return: An *opaque* object to be passed to `build_virtual_file_system`. While the exact type is provided + to aid with typing, the type name and its behaviour is not part of the API. + """ + + is_dir = path_name.endswith("/") + is_symlink = link_target is not None + + if is_symlink: + if mode is not None: + raise ValueError( + f'Please do not provide mode for symlinks. Triggered by "{path_name}"' + ) + if is_dir: + raise ValueError( + "Path name looks like a directory, but a symlink target was also provided." + f' Please remove the trailing slash OR the symlink_target. Triggered by "{path_name}"' + ) + + if content and (is_dir or is_symlink): + raise ValueError( + "Content was defined however, the path appears to be a directory a or a symlink" + f' Please remove the content, the trailing slash OR the symlink_target. Triggered by "{path_name}"' + ) + + if materialized_content is not None: + if content is not None: + raise ValueError( + "The materialized_content keyword is mutually exclusive with the content keyword." + f' Triggered by "{path_name}"' + ) + if fs_path is not None: + raise ValueError( + "The materialized_content keyword is mutually exclusive with the fs_path keyword." + f' Triggered by "{path_name}"' + ) + return PathDef( + path_name, + mode=mode, + mtime=mtime, + has_fs_path=bool(fs_path) or materialized_content is not None, + fs_path=fs_path, + link_target=link_target, + content=content, + materialized_content=materialized_content, + ) + + +class PackageProcessingContext: + """Context for auto-detectors of metadata and package processors (no instantiation) + + This object holds some context related data for the metadata detector or/and package + processors. It may receive new attributes in the future. + """ + + __slots__ = () + + @property + def binary_package(self) -> BinaryPackage: + """The binary package stanza from `debian/control`""" + raise NotImplementedError + + @property + def binary_package_version(self) -> str: + """The version of the binary package + + Note this never includes the binNMU version for arch:all packages, but it may for arch:any. + """ + raise NotImplementedError + + @property + def related_udeb_package(self) -> Optional[BinaryPackage]: + """An udeb related to this binary package (if any)""" + raise NotImplementedError + + @property + def related_udeb_package_version(self) -> Optional[str]: + """The version of the related udeb package (if present) + + Note this never includes the binNMU version for arch:all packages, but it may for arch:any. + """ + raise NotImplementedError + + def accessible_package_roots(self) -> Iterable[Tuple[BinaryPackage, "VirtualPath"]]: + raise NotImplementedError + + # """The source package stanza from `debian/control`""" + # source_package: SourcePackage + + +class DebputyPluginInitializer: + __slots__ = () + + def packager_provided_file( + self, + stem: str, + installed_path: str, + *, + default_mode: int = 0o0644, + default_priority: Optional[int] = None, + allow_name_segment: bool = True, + allow_architecture_segment: bool = False, + post_formatting_rewrite: Optional[Callable[[str], str]] = None, + packageless_is_fallback_for_all_packages: bool = False, + reservation_only: bool = False, + reference_documentation: Optional[ + PackagerProvidedFileReferenceDocumentation + ] = None, + ) -> None: + """Register a packager provided file (debian/<pkg>.foo) + + Register a packager provided file that debputy should automatically detect and install for the + packager (example `debian/foo.tmpfiles` -> `debian/foo/usr/lib/tmpfiles.d/foo.conf`). A packager + provided file typically identified by a package prefix and a "stem" and by convention placed + in the `debian/` directory. + + Like debhelper, debputy also supports the `foo.bar.tmpfiles` variant where the file is to be + installed into the `foo` package but be named after the `bar` segment rather than the package name. + This feature can be controlled via the `allow_name_segment` parameter. + + :param stem: The "stem" of the file. This would be the `tmpfiles` part of `debian/foo.tmpfiles`. + Note that this value must be unique across all registered packager provided files. + :param installed_path: A format string describing where the file should be installed. Would be + `/usr/lib/tmpfiles.d/{name}.conf` from the example above. + + The caller should provide a string with one or more of the placeholders listed below (usually `{name}` + should be one of them). The format affect the entire path. + + The following placeholders are supported: + * `{name}` - The name in the name segment (defaulting the package name if no name segment is given) + * `{priority}` / `{priority:02}` - The priority of the file. Only provided priorities are used (that + is, default_priority is not None). The latter variant ensuring that the priority takes at least + two characters and the `0` character is left-padded for priorities that takes less than two + characters. + * `{owning_package}` - The name of the package. Should only be used when `{name}` alone is insufficient. + If you do not want the "name" segment in the first place, use `allow_name_segment=False` instead. + + The path is always interpreted as relative to the binary package root. + + :param default_mode: The mode the installed file should have by default. Common options are 0o0644 (the default) + or 0o0755 (for files that must be executable). + :param allow_architecture_segment: If True, the file may have an optional "architecture" segment at the end + (`foo.tmpfiles.amd64`), which marks it architecture specific. When False, debputy will detect the + "architecture" segment and report the use as an error. Note the architecture segment is only allowed for + arch:any packages. If a file targeting an arch:all package uses an architecture specific file it will + always result in an error. + :param allow_name_segment: If True, the file may have an optional "name" segment after the package name prefix. + (`foo.<name-here>.tmpfiles`). When False, debputy will detect the "name" segment and report the use as an + error. + :param default_priority: Special-case option for packager files that are installed into directories that have + "parse ordering" or "priority". These files will generally be installed as something like `20-foo.conf` + where the `20-` denotes their "priority". If the plugin is registering such a file type, then it should + provide a default priority. + + The following placeholders are supported: + * `{name}` - The name in the name segment (defaulting the package name if no name segment is given) + * `{priority}` - The priority of the file. Only provided priorities are used (that is, default_priority + is not None) + * `{owning_package}` - The name of the package. Should only be used when `{name}` alone is insufficient. + If you do not want the "name" segment in the first place, use `allow_name_segment=False` instead. + :param post_formatting_rewrite: An optional "name correcting" callback. It receives the formatted name and can + do any transformation required. The primary use-case for this is to replace "forbidden" characters. The most + common case for debputy itself is to replace "." with "_" for tools that refuse to work with files containing + "." (`lambda x: x.replace(".", "_")`). The callback operates on basename of formatted version of the + `installed_path` and the callback should return the basename. + :param packageless_is_fallback_for_all_packages: If True, the packageless variant (such as, `debian/changelog`) + is a fallback for every package. + :param reference_documentation: Reference documentation for the packager provided file. Use the + packager_provided_file_reference_documentation function to provide the value for this parameter. + :param reservation_only: When True, tell debputy that the plugin reserves this packager provided file, but that + debputy should not actually install it automatically. This is useful in the cases, where the plugin + needs to process the file before installing it. The file will be marked as provided by this plugin. This + enables introspection and detects conflicts if other plugins attempts to claim the file. + """ + raise NotImplementedError + + def metadata_or_maintscript_detector( + self, + auto_detector_id: str, + auto_detector: MetadataAutoDetector, + *, + package_type: PackageTypeSelector = "deb", + ) -> None: + """Provide a pre-assembly hook that can affect the metadata/maintscript of binary ("deb") packages + + The provided hook will be run once per binary package to be assembled, and it can see all the content + ("data.tar") planned to be included in the deb. The hook may do any *read-only* analysis of this content + and provide metadata, alter substvars or inject maintscript snippets. However, the hook must *not* + change the content ("data.tar") part of the deb. + + The hook will be run unconditionally for all binary packages built. When the hook does not apply to all + packages, it must provide its own (internal) logic for detecting whether it is relevant and reduced itself + to a no-op if it should not apply to the current package. + + Hooks are run in "some implementation defined order" and should not rely on being run before or after + any other hook. + + The hooks are only applied to packages defined in `debian/control`. Notably, the metadata detector will + not apply to auto-generated `-dbgsym` packages (as those are not listed explicitly in `debian/control`). + + :param auto_detector_id: A plugin-wide unique ID for this detector. Packagers may use this ID for disabling + the detector and accordingly the ID is part of the plugin's API toward the packager. + :param auto_detector: The code to be called that will be run at the metadata generation state (once for each + binary package). + :param package_type: Which kind of packages this metadata detector applies to. The package type is generally + defined by `Package-Type` field in the binary package. The default is to only run for regular `deb` packages + and ignore `udeb` packages. + """ + raise NotImplementedError + + def manifest_variable( + self, + variable_name: str, + value: str, + variable_reference_documentation: Optional[str] = None, + ) -> None: + """Provide a variable that can be used in the package manifest + + >>> # Enable users to use "{{path:BASH_COMPLETION_DIR}}/foo" in their manifest. + >>> api.manifest_variable( # doctest: +SKIP + ... "path:BASH_COMPLETION_DIR", + ... "/usr/share/bash-completion/completions", + ... variable_reference_documentation="Directory to install bash completions into", + ... ) + + :param variable_name: The variable name. + :param value: The value the variable should resolve to. + :param variable_reference_documentation: A short snippet of reference documentation that explains + the purpose of the variable. + """ + raise NotImplementedError + + +class MaintscriptAccessor: + __slots__ = () + + def on_configure( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + skip_on_rollback: bool = False, + ) -> None: + """Provide a snippet to be run when the package is about to be "configured" + + This condition is the most common "post install" condition and covers the two + common cases: + * On initial install, OR + * On upgrade + + In dpkg maintscript terms, this method roughly corresponds to postinst containing + `if [ "$1" = configure ]; then <snippet>; fi` + + Additionally, the condition will by default also include rollback/abort scenarios such as "above-remove", + which is normally what you want but most people forget about. + + :param run_snippet: The actual shell snippet to be run in the given condition. The snippet must be idempotent. + The snippet may contain newlines as necessary, which will make the result more readable. Additionally, the + snippet may contain '{{FOO}}' substitutions by default. + :param skip_on_rollback: By default, this condition will also cover common rollback scenarios. This + is normally what you want (or benign in most cases due to the idempotence requirement for maintscripts). + However, you can disable the rollback cases, leaving only "On initial install OR On upgrade". + :param indent: If True, the provided snippet will be indented to fit the condition provided by debputy. + In most cases, this is safe to do and provides more readable scripts. However, it may cause issues + with some special shell syntax (such as "Heredocs"). When False, the snippet will *not* be re-indented. + You are recommended to do 4 spaces of indentation when indent is False for readability. + :param perform_substitution: When True, `{{FOO}}` will be substituted in the snippet. When False, no + substitution is provided. + """ + raise NotImplementedError + + def on_initial_install( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + """Provide a snippet to be run when the package is about to be "configured" for the first time + + The snippet will only be run on the first time the package is installed (ever or since last purge). + Note that "first" does not mean "exactly once" as dpkg does *not* provide such semantics. There are two + common cases where this can snippet can be run multiple times for the same system (and why the snippet + must still be idempotent): + + 1) The package is installed (1), then purged and then installed again (2). This can partly be mitigated + by having an `on_purge` script to do clean up. + + 2) As the package is installed, the `postinst` script terminates prematurely (Disk full, power loss, etc.). + The user resolves the problem and runs `dpkg --configure <pkg>`, which in turn restarts the script + from the beginning. This is why scripts must be idempotent in general. + + In dpkg maintscript terms, this method roughly corresponds to postinst containing + `if [ "$1" = configure ] && [ -z "$2" ]; then <snippet>; fi` + + :param run_snippet: The actual shell snippet to be run in the given condition. The snippet must be idempotent. + The snippet may contain newlines as necessary, which will make the result more readable. Additionally, the + snippet may contain '{{FOO}}' substitutions by default. + :param indent: If True, the provided snippet will be indented to fit the condition provided by debputy. + In most cases, this is safe to do and provides more readable scripts. However, it may cause issues + with some special shell syntax (such as "Heredocs"). When False, the snippet will *not* be re-indented. + You are recommended to do 4 spaces of indentation when indent is False for readability. + :param perform_substitution: When True, `{{FOO}}` will be substituted in the snippet. When False, no + substitution is provided. + """ + raise NotImplementedError + + def on_upgrade( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + """Provide a snippet to be run when the package is about to be "configured" after an upgrade + + The snippet will only be run on any upgrade (that is, it will be skipped on the initial install). + + In dpkg maintscript terms, this method roughly corresponds to postinst containing + `if [ "$1" = configure ] && [ -n "$2" ]; then <snippet>; fi` + + :param run_snippet: The actual shell snippet to be run in the given condition. The snippet must be idempotent. + The snippet may contain newlines as necessary, which will make the result more readable. Additionally, the + snippet may contain '{{FOO}}' substitutions by default. + :param indent: If True, the provided snippet will be indented to fit the condition provided by debputy. + In most cases, this is safe to do and provides more readable scripts. However, it may cause issues + with some special shell syntax (such as "Heredocs"). When False, the snippet will *not* be re-indented. + You are recommended to do 4 spaces of indentation when indent is False for readability. + :param perform_substitution: When True, `{{FOO}}` will be substituted in the snippet. When False, no + substitution is provided. + """ + raise NotImplementedError + + def on_upgrade_from( + self, + version: str, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + """Provide a snippet to be run when the package is about to be "configured" after an upgrade from a given version + + The snippet will only be run on any upgrade (that is, it will be skipped on the initial install). + + In dpkg maintscript terms, this method roughly corresponds to postinst containing + `if [ "$1" = configure ] && dpkg --compare-versions le-nl "$2" ; then <snippet>; fi` + + :param version: The version to upgrade from + :param run_snippet: The actual shell snippet to be run in the given condition. The snippet must be idempotent. + The snippet may contain newlines as necessary, which will make the result more readable. Additionally, the + snippet may contain '{{FOO}}' substitutions by default. + :param indent: If True, the provided snippet will be indented to fit the condition provided by debputy. + In most cases, this is safe to do and provides more readable scripts. However, it may cause issues + with some special shell syntax (such as "Heredocs"). When False, the snippet will *not* be re-indented. + You are recommended to do 4 spaces of indentation when indent is False for readability. + :param perform_substitution: When True, `{{FOO}}` will be substituted in the snippet. When False, no + substitution is provided. + """ + raise NotImplementedError + + def on_before_removal( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + """Provide a snippet to be run when the package is about to be removed + + The snippet will be run before dpkg removes any files. + + In dpkg maintscript terms, this method roughly corresponds to prerm containing + `if [ "$1" = remove ] ; then <snippet>; fi` + + :param run_snippet: The actual shell snippet to be run in the given condition. The snippet must be idempotent. + The snippet may contain newlines as necessary, which will make the result more readable. Additionally, the + snippet may contain '{{FOO}}' substitutions by default. + :param indent: If True, the provided snippet will be indented to fit the condition provided by debputy. + In most cases, this is safe to do and provides more readable scripts. However, it may cause issues + with some special shell syntax (such as "Heredocs"). When False, the snippet will *not* be re-indented. + You are recommended to do 4 spaces of indentation when indent is False for readability. + :param perform_substitution: When True, `{{FOO}}` will be substituted in the snippet. When False, no + substitution is provided. + """ + raise NotImplementedError + + def on_removed( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + """Provide a snippet to be run when the package has been removed + + The snippet will be run after dpkg removes the package content from the file system. + + **WARNING**: The snippet *cannot* rely on dependencies and must rely on `Essential: yes` packages. + + In dpkg maintscript terms, this method roughly corresponds to postrm containing + `if [ "$1" = remove ] ; then <snippet>; fi` + + :param run_snippet: The actual shell snippet to be run in the given condition. The snippet must be idempotent. + The snippet may contain newlines as necessary, which will make the result more readable. Additionally, the + snippet may contain '{{FOO}}' substitutions by default. + :param indent: If True, the provided snippet will be indented to fit the condition provided by debputy. + In most cases, this is safe to do and provides more readable scripts. However, it may cause issues + with some special shell syntax (such as "Heredocs"). When False, the snippet will *not* be re-indented. + You are recommended to do 4 spaces of indentation when indent is False for readability. + :param perform_substitution: When True, `{{FOO}}` will be substituted in the snippet. When False, no + substitution is provided. + """ + raise NotImplementedError + + def on_purge( + self, + run_snippet: str, + /, + indent: Optional[bool] = None, + perform_substitution: bool = True, + ) -> None: + """Provide a snippet to be run when the package is being purged. + + The snippet will when the package is purged from the system. + + **WARNING**: The snippet *cannot* rely on dependencies and must rely on `Essential: yes` packages. + + In dpkg maintscript terms, this method roughly corresponds to postrm containing + `if [ "$1" = purge ] ; then <snippet>; fi` + + :param run_snippet: The actual shell snippet to be run in the given condition. The snippet must be idempotent. + The snippet may contain newlines as necessary, which will make the result more readable. Additionally, the + snippet may contain '{{FOO}}' substitutions by default. + :param indent: If True, the provided snippet will be indented to fit the condition provided by debputy. + In most cases, this is safe to do and provides more readable scripts. However, it may cause issues + with some special shell syntax (such as "Heredocs"). When False, the snippet will *not* be re-indented. + You are recommended to do 4 spaces of indentation when indent is False for readability. + :param perform_substitution: When True, `{{FOO}}` will be substituted in the snippet. When False, no + substitution is provided. + """ + raise NotImplementedError + + def unconditionally_in_script( + self, + maintscript: Maintscript, + run_snippet: str, + /, + perform_substitution: bool = True, + ) -> None: + """Provide a snippet to be run in a given script + + Run a given snippet unconditionally from a given script. The snippet must contain its own conditional + for when it should be run. + + :param maintscript: The maintscript to insert the snippet into. + :param run_snippet: The actual shell snippet to be run. The snippet will be run unconditionally and should + contain its own conditions as necessary. The snippet must be idempotent. The snippet may contain newlines + as necessary, which will make the result more readable. Additionally, the snippet may contain '{{FOO}}' + substitutions by default. + :param perform_substitution: When True, `{{FOO}}` will be substituted in the snippet. When False, no + substitution is provided. + """ + raise NotImplementedError + + def escape_shell_words(self, *args: str) -> str: + """Provide sh-shell escape of strings + + `assert escape_shell("foo", "fu bar", "baz") == 'foo "fu bar" baz'` + + This is useful for ensuring file names and other "input" are considered one parameter even when they + contain spaces or shell meta-characters. + + :param args: The string(s) to be escaped. + :return: Each argument escaped such that each argument becomes a single "word" and then all these words are + joined by a single space. + """ + return util.escape_shell(*args) + + +class BinaryCtrlAccessor: + __slots__ = () + + def dpkg_trigger(self, trigger_type: DpkgTriggerType, trigger_target: str) -> None: + """Register a declarative dpkg level trigger + + The provided trigger will be added to the package's metadata (the triggers file of the control.tar). + + If the trigger has already been added previously, a second call with the same trigger data will be ignored. + """ + raise NotImplementedError + + @property + def maintscript(self) -> MaintscriptAccessor: + """Attribute for manipulating maintscripts""" + raise NotImplementedError + + @property + def substvars(self) -> "FlushableSubstvars": + """Attribute for manipulating dpkg substvars (deb-substvars)""" + raise NotImplementedError + + +class VirtualPath: + __slots__ = () + + @property + def name(self) -> str: + """Basename of the path a.k.a. last segment of the path + + In a path "usr/share/doc/pkg/changelog.gz" the basename is "changelog.gz". + + For a directory, the basename *never* ends with a `/`. + """ + raise NotImplementedError + + @property + def iterdir(self) -> Iterable["VirtualPath"]: + """Returns an iterable that iterates over all children of this path + + For directories, this returns an iterable of all children. For non-directories, + the iterable is always empty. + """ + raise NotImplementedError + + def lookup(self, path: str) -> Optional["VirtualPath"]: + """Perform a path lookup relative to this path + + As an example `doc_dir = fs_root.lookup('./usr/share/doc')` + + If the provided path starts with `/`, then the lookup is performed relative to the + file system root. That is, you can assume the following to always be True: + + `fs_root.lookup("usr") == any_path_beneath_fs_root.lookup('/usr')` + + Note: This method requires the path to be attached (see `is_detached`) regardless of + whether the lookup is relative or absolute. + + If the path traverse a symlink, the symlink will be resolved. + + :param path: The path to look. Can contain "." and ".." segments. If starting with `/`, + look up is performed relative to the file system root, otherwise the lookup is relative + to this path. + :return: The path object for the desired path if it can be found. Otherwise, None. + """ + raise NotImplementedError + + def all_paths(self) -> Iterable["VirtualPath"]: + """Iterate over this path and all of its descendants (if any) + + If used on the root path, then every path in the package is returned. + + The iterable is ordered, so using the order in output will be produce + bit-for-bit reproducible output. Additionally, a directory will always + be seen before its descendants. Otherwise, the order is implementation + defined. + + The iteration is lazy and as a side effect do account for some obvious + mutation. Like if the current path is removed, then none of its children + will be returned (provided mutation happens before the lazy iteration + was required to resolve it). Likewise, mutation of the directory will + also work (again, provided mutation happens before the lazy iteration order). + + :return: An ordered iterable of this path followed by its descendants. + """ + raise NotImplementedError + + @property + def is_detached(self) -> bool: + """Returns True if this path is detached + + Paths that are detached from the file system will not be present in the package and + most operations are unsafe on them. This usually only happens if the path or one of + its parent directories are unlinked (rm'ed) from the file system tree. + + All paths are attached by default and will only become detached as a result of + an action to mutate the virtual file system. Note that the file system may not + always be manipulated. + + :return: True if the entry is detached. Detached entries should be discarded, so they + can be garbage collected. + """ + raise NotImplementedError + + # The __getitem__ behaves like __getitem__ from Dict but __iter__ would ideally work like a Sequence. + # However, that does not feel compatible, so lets force people to use .children instead for the Sequence + # behaviour to avoid surprises for now. + # (Maybe it is a non-issue, but it is easier to add the API later than to remove it once we have committed + # to using it) + __iter__ = None + + def __getitem__(self, key: object) -> "VirtualPath": + """Lookup a (direct) child by name + + Ignoring the possible `KeyError`, then the following are the same: + `fs_root["usr"] == fs_root.lookup('usr')` + + Note that unlike `.lookup` this can only locate direct children. + """ + raise NotImplementedError + + def __delitem__(self, key) -> None: + """Remove a child from this node if it exists + + If that child is a directory, then the entire tree is removed (like `rm -fr`). + """ + raise NotImplementedError + + def get(self, key: str) -> "Optional[VirtualPath]": + """Lookup a (direct) child by name + + The following are the same: + `fs_root.get("usr") == fs_root.lookup('usr')` + + Note that unlike `.lookup` this can only locate direct children. + """ + try: + return self[key] + except KeyError: + return None + + def __contains__(self, item: object) -> bool: + """Determine if this path includes a given child (either by object or string) + + Examples: + + if 'foo' in dir: ... + """ + if isinstance(item, VirtualPath): + return item.parent_dir is self + if not isinstance(item, str): + return False + m = self.get(item) + return m is not None + + @property + def path(self) -> str: + """Returns the "full" path for this file system entry + + This is the path that debputy uses to refer to this file system entry. It is always + normalized. Use the `absolute` attribute for how the path looks + when the package is installed. Alternatively, there is also `fs_path`, which is the + path to the underlying file system object (assuming there is one). That is the one + you need if you want to read the file. + + This is attribute is mostly useful for debugging or for looking up the path relative + to the "root" of the virtual file system that debputy maintains. + + If the path is detached (see `is_detached`), then this method returns the path as it + was known prior to being detached. + """ + raise NotImplementedError + + @property + def absolute(self) -> str: + """Returns the absolute version of this path + + This is how to refer to this path when the package is installed. + + If the path is detached (see `is_detached`), then this method returns the last known location + of installation (prior to being detached). + + :return: The absolute path of this file as it would be on the installed system. + """ + p = self.path.lstrip(".") + if not p.startswith("/"): + return f"/{p}" + return p + + @property + def parent_dir(self) -> Optional["VirtualPath"]: + """The parent directory of this path + + Note this operation requires the path is "attached" (see `is_detached`). All paths are attached + by default but unlinking paths will cause them to become detached. + + :return: The parent path or None for the root. + """ + raise NotImplementedError + + def stat(self) -> os.stat_result: + """Attempt to do stat of the underlying path (if it exists) + + *Avoid* using `stat()` whenever possible where a more specialized attribute exist. The + `stat()` call returns the data from the file system and often, `debputy` does *not* track + its state in the file system. As an example, if you want to know the file system mode of + a path, please use the `mode` attribute instead. + + This never follow symlinks (it behaves like `os.lstat`). It will raise an error + if the path is not backed by a file system object (that is, `has_fs_path` is False). + + :return: The stat result or an error. + """ + raise NotImplementedError() + + @property + def size(self) -> int: + """Resolve the file size (`st_size`) + + This may be using `stat()` and therefore `fs_path`. + + :return: The size of the file in bytes + """ + return self.stat().st_size + + @property + def mode(self) -> int: + """Determine the mode bits of this path object + + Note that: + * like with `stat` above, this never follows symlinks. + * the mode returned by this method is not always a 1:1 with the mode in the + physical file system. As an optimization, `debputy` skips unnecessary writes + to the underlying file system in many cases. + + + :return: The mode bits for the path. + """ + raise NotImplementedError + + @mode.setter + def mode(self, new_mode: int) -> None: + """Set the octal file mode of this path + + Note that: + * this operation will fail if `path.is_read_write` returns False. + * this operation is generally *not* synced to the physical file system (as + an optimization). + + :param new_mode: The new octal mode for this path. Note that `debputy` insists + that all paths have the `user read bit` and, for directories also, the + `user execute bit`. The absence of these minimal mode bits causes hard to + debug errors. + """ + raise NotImplementedError + + @property + def is_executable(self) -> bool: + """Determine whether a path is considered executable + + Generally, this means that at least one executable bit is set. This will + basically always be true for directories as directories need the execute + parameter to be traversable. + + :return: True if the path is considered executable with its current mode + """ + return bool(self.mode & 0o0111) + + def chmod(self, new_mode: Union[int, str]) -> None: + """Set the file mode of this path + + This is similar to setting the `mode` attribute. However, this method accepts + a string argument, which will be parsed as a symbolic mode (example: `u+rX,go=rX`). + + Note that: + * this operation will fail if `path.is_read_write` returns False. + * this operation is generally *not* synced to the physical file system (as + an optimization). + + :param new_mode: The new mode for this path. + Note that `debputy` insists that all paths have the `user read bit` and, for + directories also, the `user execute bit`. The absence of these minimal mode + bits causes hard to debug errors. + """ + if isinstance(new_mode, str): + segments = parse_symbolic_mode(new_mode, None) + final_mode = self.mode + is_dir = self.is_dir + for segment in segments: + final_mode = segment.apply(final_mode, is_dir) + self.mode = final_mode + else: + self.mode = new_mode + + def chown( + self, + owner: Optional["StaticFileSystemOwner"], + group: Optional["StaticFileSystemGroup"], + ) -> None: + """Change the owner/group of this path + + :param owner: The desired owner definition for this path. If None, then no change of owner is performed. + :param group: The desired group definition for this path. If None, then no change of group is performed. + """ + raise NotImplementedError + + @property + def mtime(self) -> float: + """Determine the mtime of this path object + + Note that: + * like with `stat` above, this never follows symlinks. + * the mtime returned has *not* been clamped against ´SOURCE_DATE_EPOCH`. Timestamp + normalization is handled later by `debputy`. + * the mtime returned by this method is not always a 1:1 with the mtime in the + physical file system. As an optimization, `debputy` skips unnecessary writes + to the underlying file system in many cases. + + :return: The mtime for the path. + """ + raise NotImplementedError + + @mtime.setter + def mtime(self, new_mtime: float) -> None: + """Set the mtime of this path + + Note that: + * this operation will fail if `path.is_read_write` returns False. + * this operation is generally *not* synced to the physical file system (as + an optimization). + + :param new_mtime: The new mtime of this path. Note that the caller does not need to + account for `SOURCE_DATE_EPOCH`. Timestamp normalization is handled later. + """ + raise NotImplementedError + + def readlink(self) -> str: + """Determine the link target of this path assuming it is a symlink + + For paths where `is_symlink` is True, this already returns a link target even when + `has_fs_path` is False. + + :return: The link target of the path or an error is this is not a symlink + """ + raise NotImplementedError() + + @overload + def open( + self, + *, + byte_io: Literal[False] = False, + buffering: Optional[int] = ..., + ) -> TextIO: ... + + @overload + def open( + self, + *, + byte_io: Literal[True], + buffering: Optional[int] = ..., + ) -> BinaryIO: ... + + @overload + def open( + self, + *, + byte_io: bool, + buffering: Optional[int] = ..., + ) -> Union[TextIO, BinaryIO]: ... + + def open( + self, + *, + byte_io: bool = False, + buffering: int = -1, + ) -> Union[TextIO, BinaryIO]: + """Open the file for reading. Usually used with a context manager + + By default, the file is opened in text mode (utf-8). Binary mode can be requested + via the `byte_io` parameter. This operation is only valid for files (`is_file` returns + `True`). Usage on symlinks and directories will raise exceptions. + + This method *often* requires the `fs_path` to be present. However, tests as a notable + case can inject content without having the `fs_path` point to a real file. (To be clear, + such tests are generally expected to ensure `has_fs_path` returns `True`). + + + :param byte_io: If True, open the file in binary mode (like `rb` for `open`) + :param buffering: Same as open(..., buffering=...) where supported. Notably during + testing, the content may be purely in memory and use a BytesIO/StringIO + (which does not accept that parameter, but then is buffered in a different way) + :return: The file handle. + """ + + if not self.is_file: + raise TypeError(f"Cannot open {self.path} for reading: It is not a file") + + if byte_io: + return open(self.fs_path, "rb", buffering=buffering) + return open(self.fs_path, "rt", encoding="utf-8", buffering=buffering) + + @property + def fs_path(self) -> str: + """Request the underling fs_path of this path + + Only available when `has_fs_path` is True. Generally this should only be used for files to read + the contents of the file and do some action based on the parsed result. + + The path should only be used for read-only purposes as debputy may assume that it is safe to have + multiple paths pointing to the same file system path. + + Note that: + * This is often *not* available for directories and symlinks. + * The debputy in-memory file system overrules the physical file system. Attempting to "fix" things + by using `os.chmod` or `os.unlink`'ing files, etc. will generally not do as you expect. Best case, + your actions are ignored and worst case it will cause the build to fail as it violates debputy's + internal invariants. + + :return: The path to the underlying file system object on the build system or an error if no such + file exist (see `has_fs_path`). + """ + raise NotImplementedError() + + @property + def is_dir(self) -> bool: + """Determine if this path is a directory + + Never follows symlinks. + + :return: True if this path is a directory. False otherwise. + """ + raise NotImplementedError() + + @property + def is_file(self) -> bool: + """Determine if this path is a directory + + Never follows symlinks. + + :return: True if this path is a regular file. False otherwise. + """ + raise NotImplementedError() + + @property + def is_symlink(self) -> bool: + """Determine if this path is a symlink + + :return: True if this path is a symlink. False otherwise. + """ + raise NotImplementedError() + + @property + def has_fs_path(self) -> bool: + """Determine whether this path is backed by a file system path + + :return: True if this path is backed by a file system object on the build system. + """ + raise NotImplementedError() + + @property + def is_read_write(self) -> bool: + """When true, the file system entry may be mutated + + Read-write rules are: + + +--------------------------+-------------------+------------------------+ + | File system | From / Inside | Read-Only / Read-Write | + +--------------------------+-------------------+------------------------+ + | Source directory | Any context | Read-Only | + | Binary staging directory | Package Processor | Read-Write | + | Binary staging directory | Metadata Detector | Read-Only | + +--------------------------+-------------------+------------------------+ + + These rules apply to the virtual file system (`debputy` cannot enforce + these rules in the underlying file system). The `debputy` code relies + on these rules for its logic in multiple places to catch bugs and for + optimizations. + + As an example, the reason why the file system is read-only when Metadata + Detectors are run is based the contents of the file system has already + been committed. New files will not be included, removals of existing + files will trigger a hard error when the package is assembled, etc. + To avoid people spending hours debugging why their code does not work + as intended, `debputy` instead throws a hard error if you try to mutate + the file system when it is read-only mode to "fail fast". + + :return: Whether file system mutations are permitted. + """ + return False + + def mkdir(self, name: str) -> "VirtualPath": + """Create a new subdirectory of the current path + + :param name: Basename of the new directory. The directory must not contain a path + with this basename. + :return: The new subdirectory + """ + raise NotImplementedError + + def mkdirs(self, path: str) -> "VirtualPath": + """Ensure a given path exists and is a directory. + + :param path: Path to the directory to create. Any parent directories will be + created as needed. If the path already exists and is a directory, then it + is returned. If any part of the path exists and that is not a directory, + then the `mkdirs` call will raise an error. + :return: The directory denoted by the given path + """ + raise NotImplementedError + + def add_file( + self, + name: str, + *, + unlink_if_exists: bool = True, + use_fs_path_mode: bool = False, + mode: int = 0o0644, + mtime: Optional[float] = None, + ) -> ContextManager["VirtualPath"]: + """Add a new regular file as a child of this path + + This method will insert a new file into the virtual file system as a child + of the current path (which must be a directory). The caller must use the + return value as a context manager (see example). During the life-cycle of + the managed context, the caller can fill out the contents of the file + from the new path's `fs_path` attribute. The `fs_path` will exist as an + empty file when the context manager is entered. + + Once the context manager exits, mutation of the `fs_path` is no longer permitted. + + >>> import subprocess + >>> path = ... # doctest: +SKIP + >>> with path.add_file("foo") as new_file, open(new_file.fs_path, "w") as fd: # doctest: +SKIP + ... fd.writelines(["Some", "Content", "Here"]) + + The caller can replace the provided `fs_path` entirely provided at the end result + (when the context manager exits) is a regular file with no hard links. + + Note that this operation will fail if `path.is_read_write` returns False. + + :param name: Basename of the new file + :param unlink_if_exists: If the name was already in use, then either an exception is thrown + (when `unlink_if_exists` is False) or the path will be removed via ´unlink(recursive=False)` + (when `unlink_if_exists` is True) + :param use_fs_path_mode: When True, the file created will have this mode in the physical file + system. When the context manager exists, `debputy` will refresh its mode to match the mode + in the physical file system. This is primarily useful if the caller uses a subprocess to + mutate the path and the file mode is relevant for this tool (either as input or output). + When the parameter is false, the new file is guaranteed to be readable and writable for + the current user. However, no other guarantees are given (not even that it matches the + `mode` parameter and any changes to the mode in the physical file system will be ignored. + :param mode: This is the initial file mode. Note the `use_fs_path_mode` parameter for how + this interacts with the physical file system. + :param mtime: If the caller has a more accurate mtime than the mtime of the generated file, + then it can be provided here. Note that all mtimes will later be clamped based on + `SOURCE_DATE_EPOCH`. This parameter is only for when the conceptual mtime of this path + should be earlier than `SOURCE_DATE_EPOCH`. + :return: A Context manager that upon entering provides a `VirtualPath` instance for the + new file. The instance remains valid after the context manager exits (assuming it exits + successfully), but the file denoted by `fs_path` must not be changed after the context + manager exits + """ + raise NotImplementedError + + def replace_fs_path_content( + self, + *, + use_fs_path_mode: bool = False, + ) -> ContextManager[str]: + """Replace the contents of this file via inline manipulation + + Used as a context manager to provide the fs path for manipulation. + + Example: + >>> import subprocess + >>> path = ... # doctest: +SKIP + >>> with path.replace_fs_path_content() as fs_path: # doctest: +SKIP + ... subprocess.check_call(['strip', fs_path]) # doctest: +SKIP + + The provided file system path should be manipulated inline. The debputy framework may + copy it first as necessary and therefore the provided fs_path may be different from + `path.fs_path` prior to entering the context manager. + + Note that this operation will fail if `path.is_read_write` returns False. + + If the mutation causes the returned `fs_path` to be a non-file or a hard-linked file + when the context manager exits, `debputy` will raise an error at that point. To preserve + the internal invariants of `debputy`, the path will be unlinked as `debputy` cannot + reliably restore the path. + + :param use_fs_path_mode: If True, any changes to the mode on the physical FS path will be + recorded as the desired mode of the file when the contextmanager ends. The provided FS path + with start with the current mode when `use_fs_path_mode` is True. Otherwise, `debputy` will + ignore the mode of the file system entry and re-use its own current mode + definition. + :return: A Context manager that upon entering provides the path to a muable (copy) of + this path's `fs_path` attribute. The file on the underlying path may be mutated however + the caller wishes until the context manager exits. + """ + raise NotImplementedError + + def add_symlink(self, link_name: str, link_target: str) -> "VirtualPath": + """Add a new regular file as a child of this path + + This will create a new symlink inside the current path. If the path already exists, + the existing path will be unlinked via `unlink(recursive=False)`. + + Note that this operation will fail if `path.is_read_write` returns False. + + :param link_name: The basename of the link file entry. + :param link_target: The target of the link. Link target normalization will + be handled by `debputy`, so the caller can use relative or absolute paths. + (At the time of writing, symlink target normalization happens late) + :return: The newly created symlink. + """ + raise NotImplementedError + + def unlink(self, *, recursive: bool = False) -> None: + """Unlink a file or a directory + + This operation will remove the path from the file system (causing `is_detached` to return True). + + When the path is a: + + * symlink, then the symlink itself is removed. The target (if present) is not affected. + * *non-empty* directory, then the `recursive` parameter decides the outcome. An empty + directory will be removed regardless of the value of `recursive`. + + Note that: + * the root directory cannot be deleted. + * this operation will fail if `path.is_read_write` returns False. + + :param recursive: If True, then non-empty directories will be unlinked as well removing everything inside them + as well. When False, an error is raised if the path is a non-empty directory + """ + raise NotImplementedError + + def interpreter(self) -> Optional[Interpreter]: + """Determine the interpreter of the file (`#!`-line details) + + Note: this method is only applicable for files (`is_file` is True). + + :return: The detected interpreter if present or None if no interpreter can be detected. + """ + if not self.is_file: + raise TypeError("Only files can have interpreters") + try: + with self.open(byte_io=True, buffering=4096) as fd: + return extract_shebang_interpreter_from_file(fd) + except (PureVirtualPathError, TestPathWithNonExistentFSPathError): + return None + + def metadata( + self, + metadata_type: Type[PMT], + ) -> PathMetadataReference[PMT]: + """Fetch the path metadata reference to access the underlying metadata + + Calling this method returns a reference to an arbitrary piece of metadata associated + with this path. Plugins can store any arbitrary data associated with a given path. + Keep in mind that the metadata is stored in memory, so keep the size in moderation. + + To store / update the metadata, the path must be in read-write mode. However, + already stored metadata remains accessible even if the path becomes read-only. + + Note this method is not applicable if the path is detached + + :param metadata_type: Type of the metadata being stored. + :return: A reference to the metadata. + """ + raise NotImplementedError + + +class FlushableSubstvars(Substvars): + __slots__ = () + + @contextlib.contextmanager + def flush(self) -> Iterator[str]: + """Temporarily write the substvars to a file and then re-read it again + + >>> s = FlushableSubstvars() + >>> 'Test:Var' in s + False + >>> with s.flush() as name, open(name, 'wt', encoding='utf-8') as fobj: + ... _ = fobj.write('Test:Var=bar\\n') # "_ = " is to ignore the return value of write + >>> 'Test:Var' in s + True + + Used as a context manager to define when the file is flushed and can be + accessed via the file system. If the context terminates successfully, the + file is read and its content replaces the current substvars. + + This is mostly useful if the plugin needs to interface with a third-party + tool that requires a file as interprocess communication (IPC) for sharing + the substvars. + + The file may be truncated or completed replaced (change inode) as long as + the provided path points to a regular file when the context manager + terminates successfully. + + Note that any manipulation of the substvars via the `Substvars` API while + the file is flushed will silently be discarded if the context manager completes + successfully. + """ + with tempfile.NamedTemporaryFile(mode="w+t", encoding="utf-8") as tmp: + self.write_substvars(tmp) + tmp.flush() # Temping to use close, but then we have to manually delete the file. + yield tmp.name + # Re-open; seek did not work when I last tried (if I did it work, feel free to + # convert back to seek - as long as it works!) + with open(tmp.name, "rt", encoding="utf-8") as fd: + self.read_substvars(fd) + + def save(self) -> None: + # Promote the debputy extension over `save()` for the plugins. + if self._substvars_path is None: + raise TypeError( + "Please use `flush()` extension to temporarily write the substvars to the file system" + ) + super().save() + + +class ServiceRegistry(Generic[DSD]): + __slots__ = () + + def register_service( + self, + path: VirtualPath, + name: Union[str, List[str]], + *, + type_of_service: str = "service", # "timer", etc. + service_scope: str = "system", + enable_by_default: bool = True, + start_by_default: bool = True, + default_upgrade_rule: ServiceUpgradeRule = "restart", + service_context: Optional[DSD] = None, + ) -> None: + """Register a service detected in the package + + All the details will either be provided as-is or used as default when the plugin provided + integration code is called. + + Two services from different service managers are considered related when: + + 1) They are of the same type (`type_of_service`) and has the same scope (`service_scope`), AND + 2) Their plugin provided names has an overlap + + Related services can be covered by the same service definition in the manifest. + + :param path: The path defining this service. + :param name: The name of the service. Multiple ones can be provided if the service has aliases. + Note that when providing multiple names, `debputy` will use the first name in the list as the + default name if it has to choose. Any alternative name provided can be used by the packager + to identify this service. + :param type_of_service: The type of service. By default, this is "service", but plugins can + provide other types (such as "timer" for the systemd timer unit). + :param service_scope: The scope for this service. By default, this is "system" meaning the + service is a system-wide service. Service managers can define their own scopes such as + "user" (which is used by systemd for "per-user" services). + :param enable_by_default: Whether the service should be enabled by default, assuming the + packager does not explicitly override this setting. + :param start_by_default: Whether the service should be started by default on install, assuming + the packager does not explicitly override this setting. + :param default_upgrade_rule: The default value for how the service should be processed during + upgrades. Options are: + * `do-nothing`: The plugin should not interact with the running service (if any) + (maintenance of the enabled start, start on install, etc. are still applicable) + * `reload`: The plugin should attempt to reload the running service (if any). + Note: In combination with `auto_start_in_install == False`, be careful to not + start the service if not is not already running. + * `restart`: The plugin should attempt to restart the running service (if any). + Note: In combination with `auto_start_in_install == False`, be careful to not + start the service if not is not already running. + * `stop-then-start`: The plugin should stop the service during `prerm upgrade` + and start it against in the `postinst` script. + + :param service_context: Any custom data that the detector want to pass along to the + integrator for this service. + """ + raise NotImplementedError + + +@dataclasses.dataclass(slots=True, frozen=True) +class ParserAttributeDocumentation: + attributes: FrozenSet[str] + description: Optional[str] + + +def undocumented_attr(attr: str) -> ParserAttributeDocumentation: + """Describe an attribute as undocumented + + If you for some reason do not want to document a particular attribute, you can mark it as + undocumented. This is required if you are only documenting a subset of the attributes, + because `debputy` assumes any omission to be a mistake. + """ + return ParserAttributeDocumentation( + frozenset({attr}), + None, + ) + + +@dataclasses.dataclass(slots=True, frozen=True) +class ParserDocumentation: + title: Optional[str] = None + description: Optional[str] = None + attribute_doc: Optional[Sequence[ParserAttributeDocumentation]] = None + alt_parser_description: Optional[str] = None + documentation_reference_url: Optional[str] = None + + def replace(self, **changes: Any) -> "ParserDocumentation": + return dataclasses.replace(self, **changes) + + +@dataclasses.dataclass(slots=True, frozen=True) +class TypeMappingExample(Generic[S]): + source_input: S + + +@dataclasses.dataclass(slots=True, frozen=True) +class TypeMappingDocumentation(Generic[S]): + description: Optional[str] = None + examples: Sequence[TypeMappingExample[S]] = tuple() + + +def type_mapping_example(source_input: S) -> TypeMappingExample[S]: + return TypeMappingExample(source_input) + + +def type_mapping_reference_documentation( + *, + description: Optional[str] = None, + examples: Union[TypeMappingExample[S], Iterable[TypeMappingExample[S]]] = tuple(), +) -> TypeMappingDocumentation[S]: + e = ( + tuple([examples]) + if isinstance(examples, TypeMappingExample) + else tuple(examples) + ) + return TypeMappingDocumentation( + description=description, + examples=e, + ) + + +def documented_attr( + attr: Union[str, Iterable[str]], + description: str, +) -> ParserAttributeDocumentation: + """Describe an attribute or a group of attributes + + :param attr: A single attribute or a sequence of attributes. The attribute must be the + attribute name as used in the source format version of the TypedDict. + + If multiple attributes are provided, they will be documented together. This is often + useful if these attributes are strongly related (such as different names for the same + target attribute). + :param description: The description the user should see for this attribute / these + attributes. This parameter can be a Python format string with variables listed in + the description of `reference_documentation`. + :return: An opaque representation of the documentation, + """ + attributes = [attr] if isinstance(attr, str) else attr + return ParserAttributeDocumentation( + frozenset(attributes), + description, + ) + + +def reference_documentation( + title: str = "Auto-generated reference documentation for {RULE_NAME}", + description: Optional[str] = textwrap.dedent( + """\ + This is an automatically generated reference documentation for {RULE_NAME}. It is generated + from input provided by {PLUGIN_NAME} via the debputy API. + + (If you are the provider of the {PLUGIN_NAME} plugin, you can replace this text with + your own documentation by providing the `inline_reference_documentation` when registering + the manifest rule.) + """ + ), + attributes: Optional[Sequence[ParserAttributeDocumentation]] = None, + non_mapping_description: Optional[str] = None, + reference_documentation_url: Optional[str] = None, +) -> ParserDocumentation: + """Provide inline reference documentation for the manifest snippet + + For parameters that mention that they are a Python format, the following format variables + are available: + + * RULE_NAME: Name of the rule. If manifest snippet has aliases, this will be the name of + the alias provided by the user. + * MANIFEST_FORMAT_DOC: Path OR URL to the "MANIFEST-FORMAT" reference documentation from + `debputy`. By using the MANIFEST_FORMAT_DOC variable, you ensure that you point to the + file that matches the version of `debputy` itself. + * PLUGIN_NAME: Name of the plugin providing this rule. + + :param title: The text you want the user to see as for your rule. A placeholder is provided by default. + This parameter can be a Python format string with the above listed variables. + :param description: The text you want the user to see as a description for the rule. An auto-generated + placeholder is provided by default saying that no human written documentation was provided. + This parameter can be a Python format string with the above listed variables. + :param attributes: A sequence of attribute-related documentation. Each element of the sequence should + be the result of `documented_attr` or `undocumented_attr`. The sequence must cover all source + attributes exactly once. + :param non_mapping_description: The text you want the user to see as the description for your rule when + `debputy` describes its non-mapping format. Must not be provided for rules that do not have an + (optional) non-mapping format as source format. This parameter can be a Python format string with + the above listed variables. + :param reference_documentation_url: A URL to the reference documentation. + :return: An opaque representation of the documentation, + """ + return ParserDocumentation( + title, + description, + attributes, + non_mapping_description, + reference_documentation_url, + ) + + +class ServiceDefinition(Generic[DSD]): + __slots__ = () + + @property + def name(self) -> str: + """Name of the service registered by the plugin + + This is always a plugin provided name for this service (that is, `x.name in x.names` + will always be `True`). Where possible, this will be the same as the one that the + packager provided when they provided any configuration related to this service. + When not possible, this will be the first name provided by the plugin (`x.names[0]`). + + If all the aliases are equal, then using this attribute will provide traceability + between the manifest and the generated maintscript snippets. When the exact name + used is important, the plugin should ignore this attribute and pick the name that + is needed. + """ + raise NotImplementedError + + @property + def names(self) -> Sequence[str]: + """All *plugin provided* names and aliases of the service + + This is the name/sequence of names that the plugin provided when it registered + the service earlier. + """ + raise NotImplementedError + + @property + def path(self) -> VirtualPath: + """The registered path for this service + + :return: The path that was associated with this service when it was registered + earlier. + """ + raise NotImplementedError + + @property + def type_of_service(self) -> str: + """Type of the service such as "service" (daemon), "timer", etc. + + :return: The type of service scope. It is the same value as the one as the plugin provided + when registering the service (if not explicitly provided, it defaults to "service"). + """ + raise NotImplementedError + + @property + def service_scope(self) -> str: + """Service scope such as "system" or "user" + + :return: The service scope. It is the same value as the one as the plugin provided + when registering the service (if not explicitly provided, it defaults to "system") + """ + raise NotImplementedError + + @property + def auto_enable_on_install(self) -> bool: + """Whether the service should be auto-enabled on install + + :return: True if the service should be enabled automatically, false if not. + """ + raise NotImplementedError + + @property + def auto_start_in_install(self) -> bool: + """Whether the service should be auto-started on install + + :return: True if the service should be started automatically, false if not. + """ + raise NotImplementedError + + @property + def on_upgrade(self) -> ServiceUpgradeRule: + """How to handle the service during an upgrade + + Options are: + * `do-nothing`: The plugin should not interact with the running service (if any) + (maintenance of the enabled start, start on install, etc. are still applicable) + * `reload`: The plugin should attempt to reload the running service (if any). + Note: In combination with `auto_start_in_install == False`, be careful to not + start the service if not is not already running. + * `restart`: The plugin should attempt to restart the running service (if any). + Note: In combination with `auto_start_in_install == False`, be careful to not + start the service if not is not already running. + * `stop-then-start`: The plugin should stop the service during `prerm upgrade` + and start it against in the `postinst` script. + + Note: In all cases, the plugin should still consider what to do in + `prerm remove`, which is the last point in time where the plugin can rely on the + service definitions in the file systems to stop the services when the package is + being uninstalled. + + :return: The service restart rule + """ + raise NotImplementedError + + @property + def definition_source(self) -> str: + """Describes where this definition came from + + If the definition is provided by the packager, then this will reference the part + of the manifest that made this definition. Otherwise, this will be a reference + to the plugin providing this definition. + + :return: The source of this definition + """ + raise NotImplementedError + + @property + def is_plugin_provided_definition(self) -> bool: + """Whether the definition source points to the plugin or a package provided definition + + :return: True if definition is from the plugin. False if the definition is defined + in another place (usually, the manifest) + """ + raise NotImplementedError + + @property + def service_context(self) -> Optional[DSD]: + """Custom service context (if any) provided by the detector code of the plugin + + :return: If the detection code provided a custom data when registering the + service, this attribute will reference that data. If nothing was provided, + then this attribute will be None. + """ + raise NotImplementedError diff --git a/src/debputy/plugin/api/test_api/__init__.py b/src/debputy/plugin/api/test_api/__init__.py new file mode 100644 index 0000000..414e6c1 --- /dev/null +++ b/src/debputy/plugin/api/test_api/__init__.py @@ -0,0 +1,21 @@ +from debputy.plugin.api.test_api.test_impl import ( + package_metadata_context, + initialize_plugin_under_test, + manifest_variable_resolution_context, +) +from debputy.plugin.api.test_api.test_spec import ( + RegisteredPackagerProvidedFile, + build_virtual_file_system, + InitializedPluginUnderTest, + DEBPUTY_TEST_AGAINST_INSTALLED_PLUGINS, +) + +__all__ = [ + "initialize_plugin_under_test", + "RegisteredPackagerProvidedFile", + "build_virtual_file_system", + "InitializedPluginUnderTest", + "package_metadata_context", + "manifest_variable_resolution_context", + "DEBPUTY_TEST_AGAINST_INSTALLED_PLUGINS", +] diff --git a/src/debputy/plugin/api/test_api/test_impl.py b/src/debputy/plugin/api/test_api/test_impl.py new file mode 100644 index 0000000..46f054c --- /dev/null +++ b/src/debputy/plugin/api/test_api/test_impl.py @@ -0,0 +1,803 @@ +import contextlib +import dataclasses +import inspect +import os.path +from io import BytesIO +from typing import ( + Mapping, + Dict, + Optional, + Tuple, + List, + cast, + FrozenSet, + Sequence, + Union, + Type, + Iterator, + Set, + KeysView, + Callable, +) + +from debian.deb822 import Deb822 +from debian.substvars import Substvars + +from debputy import DEBPUTY_PLUGIN_ROOT_DIR +from debputy.architecture_support import faked_arch_table +from debputy.filesystem_scan import FSROOverlay, FSRootDir +from debputy.packages import BinaryPackage +from debputy.plugin.api import ( + PluginInitializationEntryPoint, + VirtualPath, + PackageProcessingContext, + DpkgTriggerType, + Maintscript, +) +from debputy.plugin.api.example_processing import process_discard_rule_example +from debputy.plugin.api.impl import ( + plugin_metadata_for_debputys_own_plugin, + DebputyPluginInitializerProvider, + parse_json_plugin_desc, + MaintscriptAccessorProviderBase, + BinaryCtrlAccessorProviderBase, + PLUGIN_TEST_SUFFIX, + find_json_plugin, + ServiceDefinitionImpl, +) +from debputy.plugin.api.impl_types import ( + PackagerProvidedFileClassSpec, + DebputyPluginMetadata, + PluginProvidedTrigger, + ServiceManagerDetails, +) +from debputy.plugin.api.feature_set import PluginProvidedFeatureSet +from debputy.plugin.api.spec import ( + MaintscriptAccessor, + FlushableSubstvars, + ServiceRegistry, + DSD, + ServiceUpgradeRule, +) +from debputy.plugin.api.test_api.test_spec import ( + InitializedPluginUnderTest, + RegisteredPackagerProvidedFile, + RegisteredTrigger, + RegisteredMaintscript, + DEBPUTY_TEST_AGAINST_INSTALLED_PLUGINS, + ADRExampleIssue, + DetectedService, + RegisteredMetadata, +) +from debputy.plugin.debputy.debputy_plugin import initialize_debputy_features +from debputy.substitution import SubstitutionImpl, VariableContext, Substitution +from debputy.util import package_cross_check_precheck + +RegisteredPackagerProvidedFile.register(PackagerProvidedFileClassSpec) + + +@dataclasses.dataclass(frozen=True, slots=True) +class PackageProcessingContextTestProvider(PackageProcessingContext): + binary_package: BinaryPackage + binary_package_version: str + related_udeb_package: Optional[BinaryPackage] + related_udeb_package_version: Optional[str] + accessible_package_roots: Callable[[], Sequence[Tuple[BinaryPackage, VirtualPath]]] + + +def _initialize_plugin_under_test( + plugin_metadata: DebputyPluginMetadata, + load_debputy_plugin: bool = True, +) -> "InitializedPluginUnderTest": + feature_set = PluginProvidedFeatureSet() + substitution = SubstitutionImpl( + unresolvable_substitutions=frozenset(["SOURCE_DATE_EPOCH", "PACKAGE"]), + variable_context=VariableContext( + FSROOverlay.create_root_dir("debian", "debian"), + ), + plugin_feature_set=feature_set, + ) + + if load_debputy_plugin: + debputy_plugin_metadata = plugin_metadata_for_debputys_own_plugin( + initialize_debputy_features + ) + # Load debputy's own plugin first, so conflicts with debputy's plugin are detected early + debputy_provider = DebputyPluginInitializerProvider( + debputy_plugin_metadata, + feature_set, + substitution, + ) + debputy_provider.load_plugin() + + plugin_under_test_provider = DebputyPluginInitializerProvider( + plugin_metadata, + feature_set, + substitution, + ) + plugin_under_test_provider.load_plugin() + + return InitializedPluginUnderTestImpl( + plugin_metadata.plugin_name, + feature_set, + substitution, + ) + + +def _auto_load_plugin_from_filename( + py_test_filename: str, +) -> "InitializedPluginUnderTest": + dirname, basename = os.path.split(py_test_filename) + plugin_name = PLUGIN_TEST_SUFFIX.sub("", basename).replace("_", "-") + + test_location = os.environ.get("DEBPUTY_TEST_PLUGIN_LOCATION", "uninstalled") + if test_location == "uninstalled": + json_basename = f"{plugin_name}.json" + json_desc_file = os.path.join(dirname, json_basename) + if "/" not in json_desc_file: + json_desc_file = f"./{json_desc_file}" + + if os.path.isfile(json_desc_file): + return _initialize_plugin_from_desc(json_desc_file) + + json_desc_file_in = f"{json_desc_file}.in" + if os.path.isfile(json_desc_file_in): + return _initialize_plugin_from_desc(json_desc_file) + raise FileNotFoundError( + f"Cannot determine the plugin JSON metadata descriptor: Expected it to be" + f" {json_desc_file} or {json_desc_file_in}" + ) + + if test_location == "installed": + plugin_metadata = find_json_plugin([str(DEBPUTY_PLUGIN_ROOT_DIR)], plugin_name) + return _initialize_plugin_under_test(plugin_metadata, load_debputy_plugin=True) + + raise ValueError( + 'Invalid or unsupported "DEBPUTY_TEST_PLUGIN_LOCATION" environment variable. It must be either' + ' unset OR one of "installed", "uninstalled".' + ) + + +def initialize_plugin_under_test( + *, + plugin_desc_file: Optional[str] = None, +) -> "InitializedPluginUnderTest": + """Load and initialize a plugin for testing it + + This method will load the plugin via plugin description, which is the method that `debputy` does at + run-time (in contrast to `initialize_plugin_under_test_preloaded`, which bypasses this concrete part + of the flow). + + :param plugin_desc_file: The plugin description file (`.json`) that describes how to load the plugin. + If omitted, `debputy` will attempt to attempt the plugin description file based on the test itself. + This works for "single-file" plugins, where the description file and the test are right next to + each other. + + Note that the description file is *not* required to a valid version at this stage (e.g., "N/A" or + "@PLACEHOLDER@") is fine. So you still use this method if you substitute in the version during + build after running the tests. To support this flow, the file name can also end with `.json.in` + (instead of `.json`). + :return: The loaded plugin for testing + """ + if plugin_desc_file is None: + caller_file = inspect.stack()[1].filename + return _auto_load_plugin_from_filename(caller_file) + if DEBPUTY_TEST_AGAINST_INSTALLED_PLUGINS: + raise RuntimeError( + "Running the test against an installed plugin does not work when" + " plugin_desc_file is provided. Please skip this test. You can " + " import DEBPUTY_TEST_AGAINST_INSTALLED_PLUGINS and use that as" + " conditional for this purpose." + ) + return _initialize_plugin_from_desc(plugin_desc_file) + + +def _initialize_plugin_from_desc( + desc_file: str, +) -> "InitializedPluginUnderTest": + if not desc_file.endswith((".json", ".json.in")): + raise ValueError("The plugin file must end with .json or .json.in") + + plugin_metadata = parse_json_plugin_desc(desc_file) + + return _initialize_plugin_under_test(plugin_metadata, load_debputy_plugin=True) + + +def initialize_plugin_under_test_from_inline_json( + plugin_name: str, + json_content: str, +) -> "InitializedPluginUnderTest": + with BytesIO(json_content.encode("utf-8")) as fd: + plugin_metadata = parse_json_plugin_desc(plugin_name, fd=fd) + + return _initialize_plugin_under_test(plugin_metadata, load_debputy_plugin=True) + + +def initialize_plugin_under_test_preloaded( + api_compat_version: int, + plugin_initializer: PluginInitializationEntryPoint, + /, + plugin_name: str = "plugin-under-test", + load_debputy_plugin: bool = True, +) -> "InitializedPluginUnderTest": + """Internal API: Initialize a plugin for testing without loading it from a file + + This method by-passes the standard loading mechanism, meaning you will not test that your plugin + description file is correct. Notably, any feature provided via the JSON description file will + **NOT** be visible for the test. + + This API is mostly useful for testing parts of debputy itself. + + :param api_compat_version: The API version the plugin was written for. Use the same version as the + version from the entry point (The `v1` part of `debputy.plugins.v1.initialize` translate into `1`). + :param plugin_initializer: The entry point of the plugin + :param plugin_name: Normally, debputy would derive this from the entry point. In the test, it will + use a test name and version. However, you can explicitly set if you want the real name/version. + :param load_debputy_plugin: Whether to load debputy's own plugin first. Doing so provides a more + realistic test and enables the test to detect conflicts with debputy's own plugins (de facto making + the plugin unloadable in practice if such a conflict is present). This option is mostly provided + to enable debputy to use this method for self testing. + :return: The loaded plugin for testing + """ + + if DEBPUTY_TEST_AGAINST_INSTALLED_PLUGINS: + raise RuntimeError( + "Running the test against an installed plugin does not work when" + " the plugin is preload. Please skip this test. You can " + " import DEBPUTY_TEST_AGAINST_INSTALLED_PLUGINS and use that as" + " conditional for this purpose." + ) + + plugin_metadata = DebputyPluginMetadata( + plugin_name=plugin_name, + api_compat_version=api_compat_version, + plugin_initializer=plugin_initializer, + plugin_loader=None, + plugin_path="<loaded-via-test>", + ) + + return _initialize_plugin_under_test( + plugin_metadata, + load_debputy_plugin=load_debputy_plugin, + ) + + +class _MockArchTable: + @staticmethod + def matches_architecture(_a: str, _b: str) -> bool: + return True + + +FAKE_DPKG_QUERY_TABLE = cast("DpkgArchTable", _MockArchTable()) +del _MockArchTable + + +def package_metadata_context( + *, + host_arch: str = "amd64", + package_fields: Optional[Dict[str, str]] = None, + related_udeb_package_fields: Optional[Dict[str, str]] = None, + binary_package_version: str = "1.0-1", + related_udeb_package_version: Optional[str] = None, + should_be_acted_on: bool = True, + related_udeb_fs_root: Optional[VirtualPath] = None, + accessible_package_roots: Sequence[Tuple[Mapping[str, str], VirtualPath]] = tuple(), +) -> PackageProcessingContext: + process_table = faked_arch_table(host_arch) + f = { + "Package": "foo", + "Architecture": "any", + } + if package_fields is not None: + f.update(package_fields) + + bin_package = BinaryPackage( + Deb822(f), + process_table, + FAKE_DPKG_QUERY_TABLE, + is_main_package=True, + should_be_acted_on=should_be_acted_on, + ) + udeb_package = None + if related_udeb_package_fields is not None: + uf = dict(related_udeb_package_fields) + uf.setdefault("Package", f'{f["Package"]}-udeb') + uf.setdefault("Architecture", f["Architecture"]) + uf.setdefault("Package-Type", "udeb") + udeb_package = BinaryPackage( + Deb822(uf), + process_table, + FAKE_DPKG_QUERY_TABLE, + is_main_package=False, + should_be_acted_on=True, + ) + if related_udeb_package_version is None: + related_udeb_package_version = binary_package_version + if accessible_package_roots: + apr = [] + for fields, apr_fs_root in accessible_package_roots: + apr_fields = Deb822(dict(fields)) + if "Package" not in apr_fields: + raise ValueError( + "Missing mandatory Package field in member of accessible_package_roots" + ) + if "Architecture" not in apr_fields: + raise ValueError( + "Missing mandatory Architecture field in member of accessible_package_roots" + ) + apr_package = BinaryPackage( + apr_fields, + process_table, + FAKE_DPKG_QUERY_TABLE, + is_main_package=False, + should_be_acted_on=True, + ) + r = package_cross_check_precheck(bin_package, apr_package) + if not r[0]: + raise ValueError( + f"{apr_package.name} would not be accessible for {bin_package.name}" + ) + apr.append((apr_package, apr_fs_root)) + + if related_udeb_fs_root is not None: + if udeb_package is None: + raise ValueError( + "related_udeb_package_fields must be given when related_udeb_fs_root is given" + ) + r = package_cross_check_precheck(bin_package, udeb_package) + if not r[0]: + raise ValueError( + f"{udeb_package.name} would not be accessible for {bin_package.name}, so providing" + " related_udeb_fs_root is irrelevant" + ) + apr.append(udeb_package) + apr = tuple(apr) + else: + apr = tuple() + + return PackageProcessingContextTestProvider( + binary_package=bin_package, + related_udeb_package=udeb_package, + binary_package_version=binary_package_version, + related_udeb_package_version=related_udeb_package_version, + accessible_package_roots=lambda: apr, + ) + + +def manifest_variable_resolution_context( + *, + debian_dir: Optional[VirtualPath] = None, +) -> VariableContext: + if debian_dir is None: + debian_dir = FSRootDir() + + return VariableContext(debian_dir) + + +class MaintscriptAccessorTestProvider(MaintscriptAccessorProviderBase): + __slots__ = ("_plugin_metadata", "_plugin_source_id", "_maintscript_container") + + def __init__( + self, + plugin_metadata: DebputyPluginMetadata, + plugin_source_id: str, + maintscript_container: Dict[str, List[RegisteredMaintscript]], + ): + self._plugin_metadata = plugin_metadata + self._plugin_source_id = plugin_source_id + self._maintscript_container = maintscript_container + + @classmethod + def _apply_condition_to_script( + cls, condition: str, run_snippet: str, /, indent: Optional[bool] = None + ) -> str: + return run_snippet + + def _append_script( + self, + caller_name: str, + maintscript: Maintscript, + full_script: str, + /, + perform_substitution: bool = True, + ) -> None: + if self._plugin_source_id not in self._maintscript_container: + self._maintscript_container[self._plugin_source_id] = [] + self._maintscript_container[self._plugin_source_id].append( + RegisteredMaintscript( + maintscript, + caller_name, + full_script, + perform_substitution, + ) + ) + + +class RegisteredMetadataImpl(RegisteredMetadata): + __slots__ = ( + "_substvars", + "_triggers", + "_maintscripts", + ) + + def __init__( + self, + substvars: Substvars, + triggers: List[RegisteredTrigger], + maintscripts: List[RegisteredMaintscript], + ) -> None: + self._substvars = substvars + self._triggers = triggers + self._maintscripts = maintscripts + + @property + def substvars(self) -> Substvars: + return self._substvars + + @property + def triggers(self) -> List[RegisteredTrigger]: + return self._triggers + + def maintscripts( + self, + *, + maintscript: Optional[Maintscript] = None, + ) -> List[RegisteredMaintscript]: + if maintscript is None: + return self._maintscripts + return [m for m in self._maintscripts if m.maintscript == maintscript] + + +class BinaryCtrlAccessorTestProvider(BinaryCtrlAccessorProviderBase): + __slots__ = ("_maintscript_container",) + + def __init__( + self, + plugin_metadata: DebputyPluginMetadata, + plugin_source_id: str, + context: PackageProcessingContext, + ) -> None: + super().__init__( + plugin_metadata, + plugin_source_id, + context, + {}, + FlushableSubstvars(), + (None, None), + ) + self._maintscript_container: Dict[str, List[RegisteredMaintscript]] = {} + + def _create_maintscript_accessor(self) -> MaintscriptAccessor: + return MaintscriptAccessorTestProvider( + self._plugin_metadata, + self._plugin_source_id, + self._maintscript_container, + ) + + def registered_metadata(self) -> RegisteredMetadata: + return RegisteredMetadataImpl( + self._substvars, + [ + RegisteredTrigger.from_plugin_provided_trigger(t) + for t in self._triggers.values() + if t.provider_source_id == self._plugin_source_id + ], + self._maintscript_container.get(self._plugin_source_id, []), + ) + + +class ServiceRegistryTestImpl(ServiceRegistry[DSD]): + __slots__ = ("_service_manager_details", "_service_definitions") + + def __init__( + self, + service_manager_details: ServiceManagerDetails, + detected_services: List[DetectedService[DSD]], + ) -> None: + self._service_manager_details = service_manager_details + self._service_definitions = detected_services + + def register_service( + self, + path: VirtualPath, + name: Union[str, List[str]], + *, + type_of_service: str = "service", # "timer", etc. + service_scope: str = "system", + enable_by_default: bool = True, + start_by_default: bool = True, + default_upgrade_rule: ServiceUpgradeRule = "restart", + service_context: Optional[DSD] = None, + ) -> None: + names = name if isinstance(name, list) else [name] + if len(names) < 1: + raise ValueError( + f"The service must have at least one name - {path.absolute} did not have any" + ) + self._service_definitions.append( + DetectedService( + path, + names, + type_of_service, + service_scope, + enable_by_default, + start_by_default, + default_upgrade_rule, + service_context, + ) + ) + + +@contextlib.contextmanager +def _read_only_fs_root(fs_root: VirtualPath) -> Iterator[VirtualPath]: + if fs_root.is_read_write: + assert isinstance(fs_root, FSRootDir) + fs_root.is_read_write = False + yield fs_root + fs_root.is_read_write = True + else: + yield fs_root + + +class InitializedPluginUnderTestImpl(InitializedPluginUnderTest): + def __init__( + self, + plugin_name: str, + feature_set: PluginProvidedFeatureSet, + substitution: SubstitutionImpl, + ) -> None: + self._feature_set = feature_set + self._plugin_name = plugin_name + self._packager_provided_files: Optional[ + Dict[str, RegisteredPackagerProvidedFile] + ] = None + self._triggers: Dict[Tuple[DpkgTriggerType, str], PluginProvidedTrigger] = {} + self._maintscript_container: Dict[str, List[RegisteredMaintscript]] = {} + self._substitution = substitution + assert plugin_name in self._feature_set.plugin_data + + @property + def _plugin_metadata(self) -> DebputyPluginMetadata: + return self._feature_set.plugin_data[self._plugin_name] + + def packager_provided_files_by_stem( + self, + ) -> Mapping[str, RegisteredPackagerProvidedFile]: + ppf = self._packager_provided_files + if ppf is None: + result: Dict[str, RegisteredPackagerProvidedFile] = {} + for spec in self._feature_set.packager_provided_files.values(): + if spec.debputy_plugin_metadata.plugin_name != self._plugin_name: + continue + # Registered as a virtual subclass, so this should always be True + assert isinstance(spec, RegisteredPackagerProvidedFile) + result[spec.stem] = spec + self._packager_provided_files = result + ppf = result + return ppf + + def run_metadata_detector( + self, + metadata_detector_id: str, + fs_root: VirtualPath, + context: Optional[PackageProcessingContext] = None, + ) -> RegisteredMetadata: + if fs_root.parent_dir is not None: + raise ValueError("Provided path must be the file system root.") + detectors = self._feature_set.metadata_maintscript_detectors[self._plugin_name] + matching_detectors = [ + d for d in detectors if d.detector_id == metadata_detector_id + ] + if len(matching_detectors) != 1: + assert not matching_detectors + raise ValueError( + f"The plugin {self._plugin_name} did not provide a metadata detector with ID" + f' "{metadata_detector_id}"' + ) + if context is None: + context = package_metadata_context() + detector = matching_detectors[0] + if not detector.applies_to(context.binary_package): + raise ValueError( + f'The detector "{metadata_detector_id}" from {self._plugin_name} does not apply to the' + " given package. Consider using `package_metadata_context()` to emulate a binary package" + " with the correct specification. As an example: " + '`package_metadata_context(package_fields={"Package-Type": "udeb"})` would emulate a udeb' + " package." + ) + + ctrl = BinaryCtrlAccessorTestProvider( + self._plugin_metadata, + metadata_detector_id, + context, + ) + with _read_only_fs_root(fs_root) as ro_root: + detector.run_detector( + ro_root, + ctrl, + context, + ) + return ctrl.registered_metadata() + + def run_package_processor( + self, + package_processor_id: str, + fs_root: VirtualPath, + context: Optional[PackageProcessingContext] = None, + ) -> None: + if fs_root.parent_dir is not None: + raise ValueError("Provided path must be the file system root.") + pp_key = (self._plugin_name, package_processor_id) + package_processor = self._feature_set.all_package_processors.get(pp_key) + if package_processor is None: + raise ValueError( + f"The plugin {self._plugin_name} did not provide a package processor with ID" + f' "{package_processor_id}"' + ) + if context is None: + context = package_metadata_context() + if not fs_root.is_read_write: + raise ValueError( + "The provided fs_root is read-only and it must be read-write for package processor" + ) + if not package_processor.applies_to(context.binary_package): + raise ValueError( + f'The package processor "{package_processor_id}" from {self._plugin_name} does not apply' + " to the given package. Consider using `package_metadata_context()` to emulate a binary" + " package with the correct specification. As an example: " + '`package_metadata_context(package_fields={"Package-Type": "udeb"})` would emulate a udeb' + " package." + ) + package_processor.run_package_processor( + fs_root, + None, + context, + ) + + @property + def declared_manifest_variables(self) -> FrozenSet[str]: + return frozenset( + { + k + for k, v in self._feature_set.manifest_variables.items() + if v.plugin_metadata.plugin_name == self._plugin_name + } + ) + + def automatic_discard_rules_examples_with_issues(self) -> Sequence[ADRExampleIssue]: + issues = [] + for adr in self._feature_set.auto_discard_rules.values(): + if adr.plugin_metadata.plugin_name != self._plugin_name: + continue + for idx, example in enumerate(adr.examples): + result = process_discard_rule_example( + adr, + example, + ) + if result.inconsistent_paths: + issues.append( + ADRExampleIssue( + adr.name, + idx, + [ + x.absolute + ("/" if x.is_dir else "") + for x in result.inconsistent_paths + ], + ) + ) + return issues + + def run_service_detection_and_integrations( + self, + service_manager: str, + fs_root: VirtualPath, + context: Optional[PackageProcessingContext] = None, + *, + service_context_type_hint: Optional[Type[DSD]] = None, + ) -> Tuple[List[DetectedService[DSD]], RegisteredMetadata]: + if fs_root.parent_dir is not None: + raise ValueError("Provided path must be the file system root.") + try: + service_manager_details = self._feature_set.service_managers[ + service_manager + ] + if service_manager_details.plugin_metadata.plugin_name != self._plugin_name: + raise KeyError(service_manager) + except KeyError: + raise ValueError( + f"The plugin {self._plugin_name} does not provide a" + f" service manager called {service_manager}" + ) from None + + if context is None: + context = package_metadata_context() + detected_services: List[DetectedService[DSD]] = [] + registry = ServiceRegistryTestImpl(service_manager_details, detected_services) + service_manager_details.service_detector( + fs_root, + registry, + context, + ) + ctrl = BinaryCtrlAccessorTestProvider( + self._plugin_metadata, + service_manager_details.service_manager, + context, + ) + if detected_services: + service_definitions = [ + ServiceDefinitionImpl( + ds.names[0], + ds.names, + ds.path, + ds.type_of_service, + ds.service_scope, + ds.enable_by_default, + ds.start_by_default, + ds.default_upgrade_rule, + self._plugin_name, + True, + ds.service_context, + ) + for ds in detected_services + ] + service_manager_details.service_integrator( + service_definitions, + ctrl, + context, + ) + return detected_services, ctrl.registered_metadata() + + def manifest_variables( + self, + *, + resolution_context: Optional[VariableContext] = None, + mocked_variables: Optional[Mapping[str, str]] = None, + ) -> Mapping[str, str]: + valid_manifest_variables = frozenset( + { + n + for n, v in self._feature_set.manifest_variables.items() + if v.plugin_metadata.plugin_name == self._plugin_name + } + ) + if resolution_context is None: + resolution_context = manifest_variable_resolution_context() + substitution = self._substitution.copy_for_subst_test( + self._feature_set, + resolution_context, + extra_substitutions=mocked_variables, + ) + return SubstitutionTable( + valid_manifest_variables, + substitution, + ) + + +class SubstitutionTable(Mapping[str, str]): + def __init__( + self, valid_manifest_variables: FrozenSet[str], substitution: Substitution + ) -> None: + self._valid_manifest_variables = valid_manifest_variables + self._resolved: Set[str] = set() + self._substitution = substitution + + def __contains__(self, item: object) -> bool: + return item in self._valid_manifest_variables + + def __getitem__(self, key: str) -> str: + if key not in self._valid_manifest_variables: + raise KeyError(key) + v = self._substitution.substitute( + "{{" + key + "}}", f"test of manifest variable `{key}`" + ) + self._resolved.add(key) + return v + + def __len__(self) -> int: + return len(self._valid_manifest_variables) + + def __iter__(self) -> Iterator[str]: + return iter(self._valid_manifest_variables) + + def keys(self) -> KeysView[str]: + return cast("KeysView[str]", self._valid_manifest_variables) diff --git a/src/debputy/plugin/api/test_api/test_spec.py b/src/debputy/plugin/api/test_api/test_spec.py new file mode 100644 index 0000000..b05f7ed --- /dev/null +++ b/src/debputy/plugin/api/test_api/test_spec.py @@ -0,0 +1,364 @@ +import dataclasses +import os +from abc import ABCMeta +from typing import ( + Iterable, + Mapping, + Callable, + Optional, + Union, + List, + Tuple, + Set, + Sequence, + Generic, + Type, + Self, + FrozenSet, +) + +from debian.substvars import Substvars + +from debputy import filesystem_scan +from debputy.plugin.api import ( + VirtualPath, + PackageProcessingContext, + DpkgTriggerType, + Maintscript, +) +from debputy.plugin.api.impl_types import PluginProvidedTrigger +from debputy.plugin.api.spec import DSD, ServiceUpgradeRule, PathDef +from debputy.substitution import VariableContext + +DEBPUTY_TEST_AGAINST_INSTALLED_PLUGINS = ( + os.environ.get("DEBPUTY_TEST_PLUGIN_LOCATION", "uninstalled") == "installed" +) + + +@dataclasses.dataclass(slots=True, frozen=True) +class ADRExampleIssue: + name: str + example_index: int + inconsistent_paths: Sequence[str] + + +def build_virtual_file_system( + paths: Iterable[Union[str, PathDef]], + read_write_fs: bool = True, +) -> VirtualPath: + """Create a pure-virtual file system for use with metadata detectors + + This method will generate a virtual file system a list of path names or virtual path definitions. It will + also insert any implicit path required to make the file system connected. As an example: + + >>> fs_root = build_virtual_file_system(['./usr/share/doc/package/copyright']) + >>> # The file we explicitly requested is obviously there + >>> fs_root.lookup('./usr/share/doc/package/copyright') is not None + True + >>> # but so is every directory up to that point + >>> all(fs_root.lookup(d).is_dir + ... for d in ['./usr', './usr/share', './usr/share/doc', './usr/share/doc/package'] + ... ) + True + + Any string provided will be pased to `virtual_path` using all defaults for other parameters, making `str` + arguments a nice easy shorthand if you just want a path to exist, but do not really care about it otherwise + (or `virtual_path_def` defaults happens to work for you). + + Here is a very small example of how to create some basic file system objects to get you started: + + >>> from debputy.plugin.api import virtual_path_def + >>> path_defs = [ + ... './usr/share/doc/', # Create a directory + ... virtual_path_def("./bin/zcat", link_target="/bin/gzip"), # Create a symlink + ... virtual_path_def("./bin/gzip", mode=0o755), # Create a file (with a custom mode) + ... ] + >>> fs_root = build_virtual_file_system(path_defs) + >>> fs_root.lookup('./usr/share/doc').is_dir + True + >>> fs_root.lookup('./bin/zcat').is_symlink + True + >>> fs_root.lookup('./bin/zcat').readlink() == '/bin/gzip' + True + >>> fs_root.lookup('./bin/gzip').is_file + True + >>> fs_root.lookup('./bin/gzip').mode == 0o755 + True + + :param paths: An iterable any mix of path names (str) and virtual_path_def definitions + (results from `virtual_path_def`). + :param read_write_fs: Whether the file system is read-write (True) or read-only (False). + Note that this is the default permission; the plugin test API may temporarily turn a + read-write to read-only temporarily (when running a metadata detector, etc.). + :return: The root of the generated file system + """ + return filesystem_scan.build_virtual_fs(paths, read_write_fs=read_write_fs) + + +@dataclasses.dataclass(slots=True, frozen=True) +class RegisteredTrigger: + dpkg_trigger_type: DpkgTriggerType + dpkg_trigger_target: str + + def serialized_format(self) -> str: + """The semantic contents of the DEBIAN/triggers file""" + return f"{self.dpkg_trigger_type} {self.dpkg_trigger_target}" + + @classmethod + def from_plugin_provided_trigger( + cls, + plugin_provided_trigger: PluginProvidedTrigger, + ) -> "Self": + return cls( + plugin_provided_trigger.dpkg_trigger_type, + plugin_provided_trigger.dpkg_trigger_target, + ) + + +@dataclasses.dataclass(slots=True, frozen=True) +class RegisteredMaintscript: + """Details about a maintscript registered by a plugin""" + + """Which maintscript is applies to (e.g., "postinst")""" + maintscript: Maintscript + """Which method was used to trigger the script (e.g., "on_configure")""" + registration_method: str + """The snippet provided by the plugin as it was provided + + That is, no indentation/conditions/substitutions have been applied to this text + """ + plugin_provided_script: str + """Whether substitutions would have been applied in a production run""" + requested_substitution: bool + + +@dataclasses.dataclass(slots=True, frozen=True) +class DetectedService(Generic[DSD]): + path: VirtualPath + names: Sequence[str] + type_of_service: str + service_scope: str + enable_by_default: bool + start_by_default: bool + default_upgrade_rule: ServiceUpgradeRule + service_context: Optional[DSD] + + +class RegisteredPackagerProvidedFile(metaclass=ABCMeta): + """Record of a registered packager provided file - No instantiation + + New "mandatory" attributes may be added in minor versions, which means instantiation will break tests. + Plugin providers should therefore not create instances of this dataclass. It is visible only to aid + test writing by providing type-safety / auto-completion. + """ + + """The name stem used for generating the file""" + stem: str + """The recorded directory these file should be installed into""" + installed_path: str + """The mode that debputy will give these files when installed (unless overridden)""" + default_mode: int + """The default priority assigned to files unless overriden (if priories are assigned at all)""" + default_priority: Optional[int] + """The filename format to be used""" + filename_format: Optional[str] + """The formatting correcting callback""" + post_formatting_rewrite: Optional[Callable[[str], str]] + + def compute_dest( + self, + assigned_name: str, + *, + assigned_priority: Optional[int] = None, + owning_package: Optional[str] = None, + path: Optional[VirtualPath] = None, + ) -> Tuple[str, str]: + """Determine the basename of this packager provided file + + This method is useful for verifying that the `installed_path` and `post_formatting_rewrite` works + as intended. As example, some programs do not support "." in their configuration files, so you might + have a post_formatting_rewrite à la `lambda x: x.replace(".", "_")`. Then you can test it by + calling `assert rppf.compute_dest("python3.11")[1] == "python3_11"` to verify that if a package like + `python3.11` were to use this packager provided file, it would still generate a supported file name. + + For the `assigned_name` parameter, then this is normally derived from the filename. Examples for + how to derive it: + + * `debian/my-pkg.stem` => `my-pkg` + * `debian/my-pkg.my-custom-name.stem` => `my-custom-name` + + Note that all parts (`my-pkg`, `my-custom-name` and `stem`) can contain periods (".") despite + also being a delimiter. Additionally, `my-custom-name` is not restricted to being a valid package + name, so it can have any file-system valid character in it. + + For the 0.01% case, where the plugin is using *both* `{name}` *and* `{owning_package}` in the + installed_path, then you can separately *also* set the `owning_package` attribute. However, by + default the `assigned_named` is used for both when `owning_package` is not provided. + + :param assigned_name: The name assigned. Usually this is the name of the package containing the file. + :param assigned_priority: Optionally a priority override for the file (if priority is supported). Must be + omitted/None if priorities are not supported. + :param owning_package: Optionally the name of the owning package. It is only needed for those exceedingly + rare cases where the `installed_path` contains both `{owning_package}` (usually in addition to `{name}`). + :param path: Special-case param, only needed for when testing a special `debputy` PPF.. + :return: A tuple of the directory name and the basename (in that order) that combined makes up that path + that debputy would use. + """ + raise NotImplementedError + + +class RegisteredMetadata: + __slots__ = () + + @property + def substvars(self) -> Substvars: + """Returns the Substvars + + :return: The substvars in their current state. + """ + raise NotImplementedError + + @property + def triggers(self) -> List[RegisteredTrigger]: + raise NotImplementedError + + def maintscripts( + self, + *, + maintscript: Optional[Maintscript] = None, + ) -> List[RegisteredMaintscript]: + """Extract the maintscript provided by the given metadata detector + + :param maintscript: If provided, only snippet registered for the given maintscript is returned. Can be + used to say "Give me all the 'postinst' snippets by this metadata detector", which can simplify + verification in some cases. + :return: A list of all matching maintscript registered by the metadata detector. If the detector has + not been run, then the list will be empty. If the metadata detector has been run multiple times, + then this is the aggregation of all the runs. + """ + raise NotImplementedError + + +class InitializedPluginUnderTest: + def packager_provided_files(self) -> Iterable[RegisteredPackagerProvidedFile]: + """An iterable of all packager provided files registered by the plugin under test + + If you want a particular order, please sort the result. + """ + return self.packager_provided_files_by_stem().values() + + def packager_provided_files_by_stem( + self, + ) -> Mapping[str, RegisteredPackagerProvidedFile]: + """All packager provided files registered by the plugin under test grouped by name stem""" + raise NotImplementedError + + def run_metadata_detector( + self, + metadata_detector_id: str, + fs_root: VirtualPath, + context: Optional[PackageProcessingContext] = None, + ) -> RegisteredMetadata: + """Run a metadata detector (by its ID) against a given file system + + :param metadata_detector_id: The ID of the metadata detector to run + :param fs_root: The file system the metadata detector should see (must be the root of the file system) + :param context: The context the metadata detector should see. If not provided, one will be mock will be + provided to the extent possible. + :return: The metadata registered by the metadata detector + """ + raise NotImplementedError + + def run_package_processor( + self, + package_processor_id: str, + fs_root: VirtualPath, + context: Optional[PackageProcessingContext] = None, + ) -> None: + """Run a package processor (by its ID) against a given file system + + Note: Dependency processors are *not* run first. + + :param package_processor_id: The ID of the package processor to run + :param fs_root: The file system the package processor should see (must be the root of the file system) + :param context: The context the package processor should see. If not provided, one will be mock will be + provided to the extent possible. + """ + raise NotImplementedError + + @property + def declared_manifest_variables(self) -> Union[Set[str], FrozenSet[str]]: + """Extract the manifest variables declared by the plugin + + :return: All manifest variables declared by the plugin + """ + raise NotImplementedError + + def automatic_discard_rules_examples_with_issues(self) -> Sequence[ADRExampleIssue]: + """Validate examples of the automatic discard rules + + For any failed example, use `debputy plugin show automatic-discard-rules <name>` to see + the failed example in full. + + :return: If any examples have issues, this will return a non-empty sequence with an + entry with each issue. + """ + raise NotImplementedError + + def run_service_detection_and_integrations( + self, + service_manager: str, + fs_root: VirtualPath, + context: Optional[PackageProcessingContext] = None, + *, + service_context_type_hint: Optional[Type[DSD]] = None, + ) -> Tuple[List[DetectedService[DSD]], RegisteredMetadata]: + """Run the service manager's detection logic and return the results + + This method can be used to validate the service detection and integration logic of a plugin + for a given service manager. + + First the service detector is run and if it finds any services, the integrator code is then + run on those services with their default values. + + :param service_manager: The name of the service manager as provided during the initialization + :param fs_root: The file system the system detector should see (must be the root of + the file system) + :param context: The context the service detector should see. If not provided, one will be mock + will be provided to the extent possible. + :param service_context_type_hint: Unused; but can be used as a type hint for `mypy` (etc.) + to align the return type. + :return: A tuple of the list of all detected services in the provided file system and the + metadata generated by the integrator (if any services were detected). + """ + raise NotImplementedError + + def manifest_variables( + self, + *, + resolution_context: Optional[VariableContext] = None, + mocked_variables: Optional[Mapping[str, str]] = None, + ) -> Mapping[str, str]: + """Provide a table of the manifest variables registered by the plugin + + Each key is a manifest variable and the value of said key is the value of the manifest + variable. Lazy loaded variables are resolved when accessed for the first time and may + raise exceptions if the preconditions are not correct. + + Note this method can be called multiple times with different parameters to provide + different contexts. Lazy loaded variables are resolved at most once per context. + + :param resolution_context: An optional context for lazy loaded manifest variables. + Create an instance of it via `manifest_variable_resolution_context`. + :param mocked_variables: An optional mapping that provides values for certain manifest + variables. This can be used if you want a certain variable to have a certain value + for the test to be stable (or because the manifest variable you are mocking is from + another plugin, and you do not want to deal with the implementation details of how + it is set). Any variable that depends on the mocked variable will use the mocked + variable in the given context. + :return: A table of the manifest variables provided by the plugin. Note this table + only contains manifest variables registered by the plugin. Attempting to resolve + other variables (directly), such as mocked variables or from other plugins, will + trigger a `KeyError`. + """ + raise NotImplementedError diff --git a/src/debputy/plugin/debputy/__init__.py b/src/debputy/plugin/debputy/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/debputy/plugin/debputy/__init__.py diff --git a/src/debputy/plugin/debputy/binary_package_rules.py b/src/debputy/plugin/debputy/binary_package_rules.py new file mode 100644 index 0000000..04a0fa1 --- /dev/null +++ b/src/debputy/plugin/debputy/binary_package_rules.py @@ -0,0 +1,491 @@ +import os +import textwrap +from typing import ( + Any, + List, + NotRequired, + Union, + Literal, + TypedDict, + Annotated, + Optional, +) + +from debputy import DEBPUTY_DOC_ROOT_DIR +from debputy.maintscript_snippet import DpkgMaintscriptHelperCommand, MaintscriptSnippet +from debputy.manifest_parser.base_types import ( + DebputyParsedContent, + FileSystemExactMatchRule, +) +from debputy.manifest_parser.declarative_parser import ( + DebputyParseHint, + ParserGenerator, +) +from debputy.manifest_parser.exceptions import ManifestParseException +from debputy.manifest_parser.parser_data import ParserContextData +from debputy.manifest_parser.util import AttributePath +from debputy.path_matcher import MatchRule, MATCH_ANYTHING, ExactFileSystemPath +from debputy.plugin.api import reference_documentation +from debputy.plugin.api.impl import DebputyPluginInitializerProvider +from debputy.plugin.api.impl_types import OPARSER_PACKAGES +from debputy.transformation_rules import TransformationRule + + +ACCEPTABLE_CLEAN_ON_REMOVAL_FOR_GLOBS_AND_EXACT_MATCHES = frozenset( + [ + "./var/log", + ] +) + + +ACCEPTABLE_CLEAN_ON_REMOVAL_IF_EXACT_MATCH_OR_SUBDIR_OF = frozenset( + [ + "./etc", + "./run", + "./var/lib", + "./var/cache", + "./var/backups", + "./var/spool", + # linux-image uses these paths with some `rm -f` + "./usr/lib/modules", + "./lib/modules", + # udev special case + "./lib/udev", + "./usr/lib/udev", + # pciutils deletes /usr/share/misc/pci.ids.<ext> + "./usr/share/misc", + ] +) + + +def register_binary_package_rules(api: DebputyPluginInitializerProvider) -> None: + api.plugable_manifest_rule( + OPARSER_PACKAGES, + "binary-version", + BinaryVersionParsedFormat, + _parse_binary_version, + source_format=str, + inline_reference_documentation=reference_documentation( + title="Custom binary version (`binary-version`)", + description=textwrap.dedent( + """\ + In the *rare* case that you need a binary package to have a custom version, you can use + the `binary-version:` key to describe the desired package version. An example being: + + packages: + foo: + # The foo package needs a different epoch because we took it over from a different + # source package with higher epoch version + binary-version: '1:{{DEB_VERSION_UPSTREAM_REVISION}}' + + Use this feature sparingly as it is generally not possible to undo as each version must be + monotonously higher than the previous one. This feature translates into `-v` option for + `dpkg-gencontrol`. + + The value for the `binary-version` key is a string that defines the binary version. Generally, + you will want it to contain one of the versioned related substitution variables such as + `{{DEB_VERSION_UPSTREAM_REVISION}}`. Otherwise, you will have to remember to bump the version + manually with each upload as versions cannot be reused and the package would not support binNMUs + either. + """ + ), + reference_documentation_url=f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md#custom-binary-version-binary-version", + ), + ) + + api.plugable_manifest_rule( + OPARSER_PACKAGES, + "transformations", + ListOfTransformationRulesFormat, + _unpack_list, + source_format=List[TransformationRule], + inline_reference_documentation=reference_documentation( + title="Transformations (`packages.{{PACKAGE}}.transformations`)", + description=textwrap.dedent( + """\ + You can define a `transformations` under the package definition, which is a list a transformation + rules. An example: + + packages: + foo: + transformations: + - remove: 'usr/share/doc/{{PACKAGE}}/INSTALL.md' + - move: + source: bar/* + target: foo/ + + + Transformations are ordered and are applied in the listed order. A path can be matched by multiple + transformations; how that plays out depends on which transformations are applied and in which order. + A quick summary: + + - Transformations that modify the file system layout affect how path matches in later transformations. + As an example, `move` and `remove` transformations affects what globs and path matches expand to in + later transformation rules. + + - For other transformations generally the latter transformation overrules the earlier one, when they + overlap or conflict. + """ + ), + reference_documentation_url=f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md#transformations-packagespackagetransformations", + ), + ) + + api.plugable_manifest_rule( + OPARSER_PACKAGES, + "conffile-management", + ListOfDpkgMaintscriptHelperCommandFormat, + _unpack_list, + source_format=List[DpkgMaintscriptHelperCommand], + ) + + api.plugable_manifest_rule( + OPARSER_PACKAGES, + "clean-after-removal", + ListParsedFormat, + _parse_clean_after_removal, + source_format=List[Any], + # FIXME: debputy won't see the attributes for this one :'( + inline_reference_documentation=reference_documentation( + title="Remove runtime created paths on purge or post removal (`clean-after-removal`)", + description=textwrap.dedent( + """\ + For some packages, it is necessary to clean up some run-time created paths. Typical use cases are + deleting log files, cache files, or persistent state. This can be done via the `clean-after-removal`. + An example being: + + packages: + foo: + clean-after-removal: + - /var/log/foo/*.log + - /var/log/foo/*.log.gz + - path: /var/log/foo/ + ignore-non-empty-dir: true + - /etc/non-conffile-configuration.conf + - path: /var/cache/foo + recursive: true + + The `clean-after-removal` key accepts a list, where each element is either a mapping, a string or a list + of strings. When an element is a mapping, then the following key/value pairs are applicable: + + * `path` or `paths` (required): A path match (`path`) or a list of path matches (`paths`) defining the + path(s) that should be removed after clean. The path match(es) can use globs and manifest variables. + Every path matched will by default be removed via `rm -f` or `rmdir` depending on whether the path + provided ends with a *literal* `/`. Special-rules for matches: + - Glob is interpreted by the shell, so shell (`/bin/sh`) rules apply to globs rather than + `debputy`'s glob rules. As an example, `foo/*` will **not** match `foo/.hidden-file`. + - `debputy` cannot evaluate whether these paths/globs will match the desired paths (or anything at + all). Be sure to test the resulting package. + - When a symlink is matched, it is not followed. + - Directory handling depends on the `recursive` attribute and whether the pattern ends with a literal + "/". + - `debputy` has restrictions on the globs being used to prevent rules that could cause massive damage + to the system. + + * `recursive` (optional): When `true`, the removal rule will use `rm -fr` rather than `rm -f` or `rmdir` + meaning any directory matched will be deleted along with all of its contents. + + * `ignore-non-empty-dir` (optional): When `true`, each path must be or match a directory (and as a + consequence each path must with a literal `/`). The affected directories will be deleted only if they + are empty. Non-empty directories will be skipped. This option is mutually exclusive with `recursive`. + + * `delete-on` (optional, defaults to `purge`): This attribute defines when the removal happens. It can + be set to one of the following values: + - `purge`: The removal happens with the package is being purged. This is the default. At a technical + level, the removal occurs at `postrm purge`. + - `removal`: The removal happens immediately after the package has been removed. At a technical level, + the removal occurs at `postrm remove`. + + This feature resembles the concept of `rpm`'s `%ghost` files. + """ + ), + reference_documentation_url=f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md#remove-runtime-created-paths-on-purge-or-post-removal-clean-after-removal", + ), + ) + + api.plugable_manifest_rule( + OPARSER_PACKAGES, + "installation-search-dirs", + InstallationSearchDirsParsedFormat, + _parse_installation_search_dirs, + source_format=List[FileSystemExactMatchRule], + inline_reference_documentation=reference_documentation( + title="Custom installation time search directories (`installation-search-dirs`)", + description=textwrap.dedent( + """\ + For source packages that does multiple build, it can be an advantage to provide a custom list of + installation-time search directories. This can be done via the `installation-search-dirs` key. A common + example is building the source twice with different optimization and feature settings where the second + build is for the `debian-installer` (in the form of a `udeb` package). A sample manifest snippet could + look something like: + + installations: + - install: + # Because of the search order (see below), `foo` installs `debian/tmp/usr/bin/tool`, + # while `foo-udeb` installs `debian/tmp-udeb/usr/bin/tool` (assuming both paths are + # available). Note the rule can be split into two with the same effect if that aids + # readability or understanding. + source: usr/bin/tool + into: + - foo + - foo-udeb + packages: + foo-udeb: + installation-search-dirs: + - debian/tmp-udeb + + + The `installation-search-dirs` key accepts a list, where each element is a path (str) relative from the + source root to the directory that should be used as a search directory (absolute paths are still interpreted + as relative to the source root). This list should contain all search directories that should be applicable + for this package (except the source root itself, which is always appended after the provided list). If the + key is omitted, then `debputy` will provide a default search order (In the `dh` integration, the default + is the directory `debian/tmp`). + + If a non-existing or non-directory path is listed, then it will be skipped (info-level note). If the path + exists and is a directory, it will also be checked for "not-installed" paths. + """ + ), + reference_documentation_url=f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md#custom-installation-time-search-directories-installation-search-dirs", + ), + ) + + +class BinaryVersionParsedFormat(DebputyParsedContent): + binary_version: str + + +class ListParsedFormat(DebputyParsedContent): + elements: List[Any] + + +class ListOfTransformationRulesFormat(DebputyParsedContent): + elements: List[TransformationRule] + + +class ListOfDpkgMaintscriptHelperCommandFormat(DebputyParsedContent): + elements: List[DpkgMaintscriptHelperCommand] + + +class InstallationSearchDirsParsedFormat(DebputyParsedContent): + installation_search_dirs: List[FileSystemExactMatchRule] + + +def _parse_binary_version( + _name: str, + parsed_data: BinaryVersionParsedFormat, + _attribute_path: AttributePath, + _parser_context: ParserContextData, +) -> str: + return parsed_data["binary_version"] + + +def _parse_installation_search_dirs( + _name: str, + parsed_data: InstallationSearchDirsParsedFormat, + _attribute_path: AttributePath, + _parser_context: ParserContextData, +) -> List[FileSystemExactMatchRule]: + return parsed_data["installation_search_dirs"] + + +def _unpack_list( + _name: str, + parsed_data: ListParsedFormat, + _attribute_path: AttributePath, + _parser_context: ParserContextData, +) -> List[Any]: + return parsed_data["elements"] + + +class CleanAfterRemovalRuleSourceFormat(TypedDict): + path: NotRequired[Annotated[str, DebputyParseHint.target_attribute("paths")]] + paths: NotRequired[List[str]] + delete_on: NotRequired[Literal["purge", "removal"]] + recursive: NotRequired[bool] + ignore_non_empty_dir: NotRequired[bool] + + +class CleanAfterRemovalRule(DebputyParsedContent): + paths: List[str] + delete_on: NotRequired[Literal["purge", "removal"]] + recursive: NotRequired[bool] + ignore_non_empty_dir: NotRequired[bool] + + +# FIXME: Not optimal that we are doing an initialization of ParserGenerator here. But the rule is not depending on any +# complex types that is regiersted by plugins, so it will work for now. +_CLEAN_AFTER_REMOVAL_RULE_PARSER = ParserGenerator().parser_from_typed_dict( + CleanAfterRemovalRule, + source_content=Union[CleanAfterRemovalRuleSourceFormat, str, List[str]], + inline_reference_documentation=reference_documentation( + reference_documentation_url=f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md#remove-runtime-created-paths-on-purge-or-post-removal-clean-after-removal", + ), +) + + +# Order between clean_on_removal and conffile_management is +# important. We want the dpkg conffile management rules to happen before the +# clean clean_on_removal rules. Since the latter only affects `postrm` +# and the order is reversed for `postrm` scripts (among other), we need do +# clean_on_removal first to account for the reversing of order. +# +# FIXME: All of this is currently not really possible todo, but it should be. +# (I think it is the correct order by "mistake" rather than by "design", which is +# what this note is about) +def _parse_clean_after_removal( + _name: str, + parsed_data: ListParsedFormat, + attribute_path: AttributePath, + parser_context: ParserContextData, +) -> None: # TODO: Return and pass to a maintscript helper + raw_clean_after_removal = parsed_data["elements"] + package_state = parser_context.current_binary_package_state + + for no, raw_transformation in enumerate(raw_clean_after_removal): + definition_source = attribute_path[no] + clean_after_removal_rules = _CLEAN_AFTER_REMOVAL_RULE_PARSER.parse_input( + raw_transformation, + definition_source, + parser_context=parser_context, + ) + patterns = clean_after_removal_rules["paths"] + if patterns: + definition_source.path_hint = patterns[0] + delete_on = clean_after_removal_rules.get("delete_on") or "purge" + recurse = clean_after_removal_rules.get("recursive") or False + ignore_non_empty_dir = ( + clean_after_removal_rules.get("ignore_non_empty_dir") or False + ) + if delete_on == "purge": + condition = '[ "$1" = "purge" ]' + else: + condition = '[ "$1" = "remove" ]' + + if ignore_non_empty_dir: + if recurse: + raise ManifestParseException( + 'The "recursive" and "ignore-non-empty-dir" options are mutually exclusive.' + f" Both were enabled at the same time in at {definition_source.path}" + ) + for pattern in patterns: + if not pattern.endswith("/"): + raise ManifestParseException( + 'When ignore-non-empty-dir is True, then all patterns must end with a literal "/"' + f' to ensure they only apply to directories. The pattern "{pattern}" at' + f" {definition_source.path} did not." + ) + + substitution = parser_context.substitution + match_rules = [ + MatchRule.from_path_or_glob( + p, definition_source.path, substitution=substitution + ) + for p in patterns + ] + content_lines = [ + f"if {condition}; then\n", + ] + for idx, match_rule in enumerate(match_rules): + original_pattern = patterns[idx] + if match_rule is MATCH_ANYTHING: + raise ManifestParseException( + f'Using "{original_pattern}" in a clean rule would trash the system.' + f" Please restrict this pattern at {definition_source.path} considerably." + ) + is_subdir_match = False + matched_directory: Optional[str] + if isinstance(match_rule, ExactFileSystemPath): + matched_directory = ( + os.path.dirname(match_rule.path) + if match_rule.path not in ("/", ".", "./") + else match_rule.path + ) + is_subdir_match = True + else: + matched_directory = getattr(match_rule, "directory", None) + + if matched_directory is None: + raise ManifestParseException( + f'The pattern "{original_pattern}" defined at {definition_source.path} is not' + f" trivially anchored in a specific directory. Cowardly refusing to use it" + f" in a clean rule as it may trash the system if the pattern is overreaching." + f" Please avoid glob characters in the top level directories." + ) + assert matched_directory.startswith("./") or matched_directory in ( + ".", + "./", + "", + ) + acceptable_directory = False + would_have_allowed_direct_match = False + while matched_directory not in (".", "./", ""): + # Our acceptable paths set includes "/var/lib" or "/etc". We require that the + # pattern is either an exact match, in which case it may match directly inside + # the acceptable directory OR it is a pattern against a subdirectory of the + # acceptable path. As an example: + # + # /etc/inputrc <-- OK, exact match + # /etc/foo/* <-- OK, subdir match + # /etc/* <-- ERROR, glob directly in the accepted directory. + if is_subdir_match and ( + matched_directory + in ACCEPTABLE_CLEAN_ON_REMOVAL_IF_EXACT_MATCH_OR_SUBDIR_OF + ): + acceptable_directory = True + break + if ( + matched_directory + in ACCEPTABLE_CLEAN_ON_REMOVAL_FOR_GLOBS_AND_EXACT_MATCHES + ): + # Special-case: In some directories (such as /var/log), we allow globs directly. + # Notably, X11's log files are /var/log/Xorg.*.log + acceptable_directory = True + break + if ( + matched_directory + in ACCEPTABLE_CLEAN_ON_REMOVAL_IF_EXACT_MATCH_OR_SUBDIR_OF + ): + would_have_allowed_direct_match = True + break + matched_directory = os.path.dirname(matched_directory) + is_subdir_match = True + + if would_have_allowed_direct_match and not acceptable_directory: + raise ManifestParseException( + f'The pattern "{original_pattern}" defined at {definition_source.path} seems to' + " be overreaching. If it has been a path (and not use a glob), the rule would" + " have been permitted." + ) + elif not acceptable_directory: + raise ManifestParseException( + f'The pattern or path "{original_pattern}" defined at {definition_source.path} seems to' + f' be overreaching or not limited to the set of "known acceptable" directories.' + ) + + try: + shell_escaped_pattern = match_rule.shell_escape_pattern() + except TypeError: + raise ManifestParseException( + f'Sorry, the pattern "{original_pattern}" defined at {definition_source.path}' + f" is unfortunately not supported by `debputy` for clean-after-removal rules." + f" If you can rewrite the rule to something like `/var/log/foo/*.log` or" + f' similar "trivial" patterns. You may have to rewrite the pattern the rule ' + f" into multiple patterns to achieve this. This restriction is to enable " + f' `debputy` to ensure the pattern is correctly executed plus catch "obvious' + f' system trashing" patterns. Apologies for the inconvenience.' + ) + + if ignore_non_empty_dir: + cmd = f' rmdir --ignore-fail-on-non-empty "${{DPKG_ROOT}}"{shell_escaped_pattern}\n' + elif recurse: + cmd = f' rm -fr "${{DPKG_ROOT}}"{shell_escaped_pattern}\n' + elif original_pattern.endswith("/"): + cmd = f' rmdir "${{DPKG_ROOT}}"{shell_escaped_pattern}\n' + else: + cmd = f' rm -f "${{DPKG_ROOT}}"{shell_escaped_pattern}\n' + content_lines.append(cmd) + content_lines.append("fi\n") + + snippet = MaintscriptSnippet(definition_source.path, "".join(content_lines)) + package_state.maintscript_snippets["postrm"].append(snippet) diff --git a/src/debputy/plugin/debputy/debputy_plugin.py b/src/debputy/plugin/debputy/debputy_plugin.py new file mode 100644 index 0000000..7a8f6da --- /dev/null +++ b/src/debputy/plugin/debputy/debputy_plugin.py @@ -0,0 +1,400 @@ +import textwrap + +from debputy.plugin.api import ( + DebputyPluginInitializer, + packager_provided_file_reference_documentation, +) +from debputy.plugin.debputy.metadata_detectors import ( + detect_systemd_tmpfiles, + detect_kernel_modules, + detect_icons, + detect_gsettings_dependencies, + detect_xfonts, + detect_initramfs_hooks, + detect_systemd_sysusers, + detect_pycompile_files, + translate_capabilities, + pam_auth_update, + auto_depends_arch_any_solink, +) +from debputy.plugin.debputy.paths import ( + SYSTEMD_TMPFILES_DIR, + INITRAMFS_HOOK_DIR, + GSETTINGS_SCHEMA_DIR, + SYSTEMD_SYSUSERS_DIR, +) +from debputy.plugin.debputy.private_api import initialize_via_private_api + + +def initialize_debputy_features(api: DebputyPluginInitializer) -> None: + initialize_via_private_api(api) + declare_manifest_variables(api) + register_packager_provided_files(api) + register_package_metadata_detectors(api) + + +def declare_manifest_variables(api: DebputyPluginInitializer) -> None: + api.manifest_variable( + "path:BASH_COMPLETION_DIR", + "/usr/share/bash-completion/completions", + variable_reference_documentation="Directory to install bash completions into", + ) + api.manifest_variable( + "path:GNU_INFO_DIR", + "/usr/share/info", + variable_reference_documentation="Directory to install GNU INFO files into", + ) + + api.manifest_variable( + "token:NL", + "\n", + variable_reference_documentation="Literal newline (linefeed) character", + ) + api.manifest_variable( + "token:NEWLINE", + "\n", + variable_reference_documentation="Literal newline (linefeed) character", + ) + api.manifest_variable( + "token:TAB", + "\t", + variable_reference_documentation="Literal tab character", + ) + api.manifest_variable( + "token:OPEN_CURLY_BRACE", + "{", + variable_reference_documentation='Literal "{" character', + ) + api.manifest_variable( + "token:CLOSE_CURLY_BRACE", + "}", + variable_reference_documentation='Literal "}" character', + ) + api.manifest_variable( + "token:DOUBLE_OPEN_CURLY_BRACE", + "{{", + variable_reference_documentation='Literal "{{" character - useful to avoid triggering a substitution', + ) + api.manifest_variable( + "token:DOUBLE_CLOSE_CURLY_BRACE", + "}}", + variable_reference_documentation='Literal "}}" string - useful to avoid triggering a substitution', + ) + + +def register_package_metadata_detectors(api: DebputyPluginInitializer) -> None: + api.metadata_or_maintscript_detector("systemd-tmpfiles", detect_systemd_tmpfiles) + api.metadata_or_maintscript_detector("systemd-sysusers", detect_systemd_sysusers) + api.metadata_or_maintscript_detector("kernel-modules", detect_kernel_modules) + api.metadata_or_maintscript_detector("icon-cache", detect_icons) + api.metadata_or_maintscript_detector( + "gsettings-dependencies", + detect_gsettings_dependencies, + ) + api.metadata_or_maintscript_detector("xfonts", detect_xfonts) + api.metadata_or_maintscript_detector("initramfs-hooks", detect_initramfs_hooks) + api.metadata_or_maintscript_detector("pycompile-files", detect_pycompile_files) + api.metadata_or_maintscript_detector( + "translate-capabilities", + translate_capabilities, + ) + api.metadata_or_maintscript_detector("pam-auth-update", pam_auth_update) + api.metadata_or_maintscript_detector( + "auto-depends-arch-any-solink", + auto_depends_arch_any_solink, + ) + + +def register_packager_provided_files(api: DebputyPluginInitializer) -> None: + api.packager_provided_file( + "tmpfiles", + f"{SYSTEMD_TMPFILES_DIR}/{{name}}.conf", + reference_documentation=packager_provided_file_reference_documentation( + format_documentation_uris=["man:tmpfiles.d(5)"] + ), + ) + api.packager_provided_file( + "sysusers", + f"{SYSTEMD_SYSUSERS_DIR}/{{name}}.conf", + reference_documentation=packager_provided_file_reference_documentation( + format_documentation_uris=["man:sysusers.d(5)"] + ), + ) + api.packager_provided_file( + "bash-completion", "/usr/share/bash-completion/completions/{name}" + ) + api.packager_provided_file( + "bug-script", + "./usr/share/bug/{name}/script", + default_mode=0o0755, + allow_name_segment=False, + ) + api.packager_provided_file( + "bug-control", + "/usr/share/bug/{name}/control", + allow_name_segment=False, + ) + + api.packager_provided_file( + "bug-presubj", + "/usr/share/bug/{name}/presubj", + allow_name_segment=False, + ) + + api.packager_provided_file("pam", "/usr/lib/pam.d/{name}") + api.packager_provided_file( + "ppp.ip-up", + "/etc/ppp/ip-up.d/{name}", + default_mode=0o0755, + ) + api.packager_provided_file( + "ppp.ip-down", + "/etc/ppp/ip-down.d/{name}", + default_mode=0o0755, + ) + api.packager_provided_file( + "lintian-overrides", + "/usr/share/lintian/overrides/{name}", + allow_name_segment=False, + ) + api.packager_provided_file("logrotate", "/etc/logrotate.d/{name}") + api.packager_provided_file( + "logcheck.cracking", + "/etc/logcheck/cracking.d/{name}", + post_formatting_rewrite=_replace_dot_with_underscore, + ) + api.packager_provided_file( + "logcheck.violations", + "/etc/logcheck/violations.d/{name}", + post_formatting_rewrite=_replace_dot_with_underscore, + ) + api.packager_provided_file( + "logcheck.violations.ignore", + "/etc/logcheck/violations.ignore.d/{name}", + post_formatting_rewrite=_replace_dot_with_underscore, + ) + api.packager_provided_file( + "logcheck.ignore.workstation", + "/etc/logcheck/ignore.d.workstation/{name}", + post_formatting_rewrite=_replace_dot_with_underscore, + ) + api.packager_provided_file( + "logcheck.ignore.server", + "/etc/logcheck/ignore.d.server/{name}", + post_formatting_rewrite=_replace_dot_with_underscore, + ) + api.packager_provided_file( + "logcheck.ignore.paranoid", + "/etc/logcheck/ignore.d.paranoid/{name}", + post_formatting_rewrite=_replace_dot_with_underscore, + ) + + api.packager_provided_file("mime", "/usr/lib/mime/packages/{name}") + api.packager_provided_file("sharedmimeinfo", "/usr/share/mime/packages/{name}.xml") + + api.packager_provided_file( + "if-pre-up", + "/etc/network/if-pre-up.d/{name}", + default_mode=0o0755, + ) + api.packager_provided_file( + "if-up", + "/etc/network/if-up.d/{name}", + default_mode=0o0755, + ) + api.packager_provided_file( + "if-down", + "/etc/network/if-down.d/{name}", + default_mode=0o0755, + ) + api.packager_provided_file( + "if-post-down", + "/etc/network/if-post-down.d/{name}", + default_mode=0o0755, + ) + + api.packager_provided_file( + "cron.hourly", + "/etc/cron.hourly/{name}", + default_mode=0o0755, + ) + api.packager_provided_file( + "cron.daily", + "/etc/cron.daily/{name}", + default_mode=0o0755, + ) + api.packager_provided_file( + "cron.weekly", + "/etc/cron.weekly/{name}", + default_mode=0o0755, + ) + api.packager_provided_file( + "cron.monthly", + "./etc/cron.monthly/{name}", + default_mode=0o0755, + ) + api.packager_provided_file( + "cron.yearly", + "/etc/cron.yearly/{name}", + default_mode=0o0755, + ) + # cron.d uses 0644 unlike the others + api.packager_provided_file( + "cron.d", + "/etc/cron.d/{name}", + reference_documentation=packager_provided_file_reference_documentation( + format_documentation_uris=["man:crontab(5)"] + ), + ) + + api.packager_provided_file( + "initramfs-hook", f"{INITRAMFS_HOOK_DIR}/{{name}}", default_mode=0o0755 + ) + + api.packager_provided_file("modprobe", "/etc/modprobe.d/{name}.conf") + + api.packager_provided_file( + "init", + "/etc/init.d/{name}", + default_mode=0o755, + ) + api.packager_provided_file("default", "/etc/default/{name}") + + for stem in [ + "mount", + "path", + "service", + "socket", + "target", + "timer", + ]: + api.packager_provided_file( + stem, + f"/usr/lib/systemd/system/{{name}}.{stem}", + reference_documentation=packager_provided_file_reference_documentation( + format_documentation_uris=[f"man:systemd.{stem}(5)"] + ), + ) + + for stem in [ + "path", + "service", + "socket", + "target", + "timer", + ]: + api.packager_provided_file( + f"@{stem}", f"/usr/lib/systemd/system/{{name}}@.{stem}" + ) + + # api.packager_provided_file( + # "udev", + # "./lib/udev/rules.d/{priority:02}-{name}.rules", + # default_priority=60, + # ) + + api.packager_provided_file( + "gsettings-override", + f"{GSETTINGS_SCHEMA_DIR}/{{priority:02}}_{{name}}.gschema.override", + default_priority=10, + ) + + # Special-cases that will probably not be a good example for other plugins + api.packager_provided_file( + "changelog", + # The "changelog.Debian" gets renamed to "changelog" for native packages elsewhere. + # Also, the changelog trimming is also done elsewhere. + "/usr/share/doc/{name}/changelog.Debian", + allow_name_segment=False, + packageless_is_fallback_for_all_packages=True, + reference_documentation=packager_provided_file_reference_documentation( + description=textwrap.dedent( + """\ + This file is the changelog of the package and is mandatory. + + The changelog contains the version of the source package and is mandatory for all + packages. + + Use `dch --create` to create the changelog. + + In theory, the binary package can have a different changelog than the source + package (by having `debian/binary-package.changelog`). However, it is generally + not useful and leads to double administration. It has not been used in practice. + """ + ), + format_documentation_uris=[ + "man:deb-changelog(5)", + "https://www.debian.org/doc/debian-policy/ch-source.html#debian-changelog-debian-changelog", + "man:dch(1)", + ], + ), + ) + api.packager_provided_file( + "copyright", + "/usr/share/doc/{name}/copyright", + allow_name_segment=False, + packageless_is_fallback_for_all_packages=True, + reference_documentation=packager_provided_file_reference_documentation( + description=textwrap.dedent( + """\ + This file documents the license and copyright information of the binary package. + Packages aimed at the Debian archive (and must derivatives thereof) must have this file. + + For packages not aimed at Debian, the file can still be useful to convey the license + terms of the package (which is often a requirement in many licenses). However, it is + not a strict *technical* requirement. Whether it is a legal requirement depends on + license. + + Often, the same file can be used for all packages. In the extremely rare case where + one binary package has a "vastly different" license than the other packages, you can + provide a package specific version for that package. + """ + ), + format_documentation_uris=[ + "https://www.debian.org/doc/debian-policy/ch-source.html#copyright-debian-copyright", + "https://www.debian.org/doc/debian-policy/ch-docs.html#s-copyrightfile", + "https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/", + ], + ), + ) + api.packager_provided_file( + "NEWS", + "/usr/share/doc/{name}/NEWS.Debian", + allow_name_segment=False, + packageless_is_fallback_for_all_packages=True, + reference_documentation=packager_provided_file_reference_documentation( + description=textwrap.dedent( + """\ + Important news that should be shown to the user/admin when upgrading. If a system has + apt-listchanges installed, then contents of this file will be shown prior to upgrading + the package. + + Uses a similar format to that of debian/changelog (create with `dch --news --create`). + """ + ), + format_documentation_uris=[ + "https://www.debian.org/doc/manuals/developers-reference/best-pkging-practices.en.html#supplementing-changelogs-with-news-debian-files", + "man:dch(1)", + ], + ), + ) + api.packager_provided_file( + "README.Debian", + "/usr/share/doc/{name}/README.Debian", + allow_name_segment=False, + ) + api.packager_provided_file( + "TODO", + "/usr/share/doc/{name}/TODO.Debian", + allow_name_segment=False, + ) + # From dh-python / dh_python3 + # api.packager_provided_file( + # "bcep", + # "/usr/share/python3/bcep/{name}", + # allow_name_segment=False, + # ) + + +def _replace_dot_with_underscore(x: str) -> str: + return x.replace(".", "_") diff --git a/src/debputy/plugin/debputy/discard_rules.py b/src/debputy/plugin/debputy/discard_rules.py new file mode 100644 index 0000000..689761e --- /dev/null +++ b/src/debputy/plugin/debputy/discard_rules.py @@ -0,0 +1,97 @@ +import re + +from debputy.plugin.api import VirtualPath + +_VCS_PATHS = { + ".arch-inventory", + ".arch-ids", + ".be", + ".bzrbackup", + ".bzrignore", + ".bzrtags", + ".cvsignore", + ".hg", + ".hgignore", + ".hgtags", + ".hgsigs", + ".git", + ".gitignore", + ".gitattributes", + ".gitmodules", + ".gitreview", + ".mailmap", + ".mtn-ignore", + ".svn", + "{arch}", + "CVS", + "RCS", + "_MTN", + "_darcs", +} + +_BACKUP_FILES_RE = re.compile( + "|".join( + [ + # Common backup files + r".*~", + r".*[.](?:bak|orig|rej)", + # Editor backup/swap files + r"[.]#.*", + r"[.].*[.]sw.", + # Other known stuff + r"[.]shelf", + r",,.*", # "baz-style junk" (according to dpkg (Dpkg::Source::Package) + r"DEADJOE", # Joe's one line of immortality that just gets cargo cult'ed around ... just in case. + ] + ) +) + +_DOXYGEN_DIR_TEST_FILES = ["doxygen.css", "doxygen.svg", "index.html"] + + +def _debputy_discard_pyc_files(path: "VirtualPath") -> bool: + if path.name == "__pycache__" and path.is_dir: + return True + return path.name.endswith((".pyc", ".pyo")) and path.is_file + + +def _debputy_prune_la_files(path: "VirtualPath") -> bool: + return ( + path.name.endswith(".la") + and path.is_file + and path.absolute.startswith("/usr/lib") + ) + + +def _debputy_prune_backup_files(path: VirtualPath) -> bool: + return bool(_BACKUP_FILES_RE.match(path.name)) + + +def _debputy_prune_vcs_paths(path: VirtualPath) -> bool: + return path.name in _VCS_PATHS + + +def _debputy_prune_info_dir_file(path: VirtualPath) -> bool: + return path.absolute == "/usr/share/info/dir" + + +def _debputy_prune_binary_debian_dir(path: VirtualPath) -> bool: + return path.absolute == "/DEBIAN" + + +def _debputy_prune_doxygen_cruft(path: VirtualPath) -> bool: + if not path.name.endswith((".md5", ".map")) or not path.is_file: + return False + parent_dir = path.parent_dir + while parent_dir: + is_doxygen_dir = True + for name in _DOXYGEN_DIR_TEST_FILES: + test_file = parent_dir.get(name) + if test_file is None or not test_file.is_file: + is_doxygen_dir = False + break + + if is_doxygen_dir: + return True + parent_dir = parent_dir.parent_dir + return False diff --git a/src/debputy/plugin/debputy/manifest_root_rules.py b/src/debputy/plugin/debputy/manifest_root_rules.py new file mode 100644 index 0000000..cc2b1d4 --- /dev/null +++ b/src/debputy/plugin/debputy/manifest_root_rules.py @@ -0,0 +1,254 @@ +import textwrap +from typing import List, Any, Dict, Tuple, TYPE_CHECKING, cast + +from debputy._manifest_constants import ( + ManifestVersion, + MK_MANIFEST_VERSION, + MK_INSTALLATIONS, + SUPPORTED_MANIFEST_VERSIONS, + MK_MANIFEST_DEFINITIONS, + MK_PACKAGES, + MK_MANIFEST_VARIABLES, +) +from debputy.exceptions import DebputySubstitutionError +from debputy.installations import InstallRule +from debputy.manifest_parser.base_types import DebputyParsedContent +from debputy.manifest_parser.exceptions import ManifestParseException +from debputy.manifest_parser.parser_data import ParserContextData +from debputy.manifest_parser.util import AttributePath +from debputy.plugin.api import reference_documentation +from debputy.plugin.api.impl import DebputyPluginInitializerProvider +from debputy.plugin.api.impl_types import ( + OPARSER_MANIFEST_ROOT, + OPARSER_MANIFEST_DEFINITIONS, + SUPPORTED_DISPATCHABLE_OBJECT_PARSERS, + OPARSER_PACKAGES, +) +from debputy.substitution import VariableNameState, SUBST_VAR_RE + +if TYPE_CHECKING: + from debputy.highlevel_manifest_parser import YAMLManifestParser + + +def register_manifest_root_rules(api: DebputyPluginInitializerProvider) -> None: + # Registration order matters. Notably, definitions must come before anything that can + # use definitions (variables), which is why it is second only to the manifest version. + api.plugable_manifest_rule( + OPARSER_MANIFEST_ROOT, + MK_MANIFEST_VERSION, + ManifestVersionFormat, + _handle_version, + source_format=ManifestVersion, + inline_reference_documentation=reference_documentation( + title="Manifest version", + description=textwrap.dedent( + """\ + All `debputy` manifests must include a `debputy` manifest version, which will enable the + format to change over time. For now, there is only one version (`"0.1"`) and you have + to include the line: + + manifest-version: "0.1" + + On its own, the manifest containing only `manifest-version: "..."` will not do anything. So if you + end up only having the `manifest-version` key in the manifest, you can just remove the manifest and + rely entirely on the built-in rules. + """ + ), + ), + ) + api.plugable_object_parser( + OPARSER_MANIFEST_ROOT, + MK_MANIFEST_DEFINITIONS, + object_parser_key=OPARSER_MANIFEST_DEFINITIONS, + on_end_parse_step=lambda _a, _b, _c, mp: mp._ensure_package_states_is_initialized(), + ) + api.plugable_manifest_rule( + OPARSER_MANIFEST_DEFINITIONS, + MK_MANIFEST_VARIABLES, + ManifestVariablesParsedFormat, + _handle_manifest_variables, + source_format=Dict[str, str], + inline_reference_documentation=reference_documentation( + title="Manifest Variables (`variables`)", + description=textwrap.dedent( + """\ + It is possible to provide custom manifest variables via the `variables` attribute. An example: + + manifest-version: '0.1' + definitions: + variables: + LIBPATH: "/usr/lib/{{DEB_HOST_MULTIARCH}}" + SONAME: "1" + installations: + - install: + source: build/libfoo.so.{{SONAME}}* + # The quotes here is for the YAML parser's sake. + dest-dir: "{{LIBPATH}}" + into: libfoo{{SONAME}} + + The value of the `variables` key must be a mapping, where each key is a new variable name and + the related value is the value of said key. The keys must be valid variable name and not shadow + existing variables (that is, variables such as `PACKAGE` and `DEB_HOST_MULTIARCH` *cannot* be + redefined). The value for each variable *can* refer to *existing* variables as seen in the + example above. + + As usual, `debputy` will insist that all declared variables must be used. + + Limitations: + * When declaring variables that depends on another variable declared in the manifest, the + order is important. The variables are resolved from top to bottom. + * When a manifest variable depends on another manifest variable, the existing variable is + currently always resolved in source context. As a consequence, some variables such as + `{{PACKAGE}}` cannot be used when defining a variable. This restriction may be + lifted in the future. + """ + ), + ), + ) + api.plugable_manifest_rule( + OPARSER_MANIFEST_ROOT, + MK_INSTALLATIONS, + ListOfInstallRulesFormat, + _handle_installation_rules, + source_format=List[InstallRule], + inline_reference_documentation=reference_documentation( + title="Installations", + description=textwrap.dedent( + """\ + For source packages building a single binary, the `dh_auto_install` from debhelper will default to + providing everything from upstream's install in the binary package. The `debputy` tool matches this + behaviour and accordingly, the `installations` feature is only relevant in this case when you need to + manually specify something upstream's install did not cover. + + For sources, that build multiple binaries, where `dh_auto_install` does not detect anything to install, + or when `dh_auto_install --destdir debian/tmp` is used, the `installations` section of the manifest is + used to declare what goes into which binary package. An example: + + installations: + - install: + sources: "usr/bin/foo" + into: foo + - install: + sources: "usr/*" + into: foo-extra + + All installation rules are processed in order (top to bottom). Once a path has been matched, it can + no longer be matched by future rules. In the above example, then `usr/bin/foo` would be in the `foo` + package while everything in `usr` *except* `usr/bin/foo` would be in `foo-extra`. If these had been + ordered in reverse, the `usr/bin/foo` rule would not have matched anything and caused `debputy` + to reject the input as an error on that basis. This behaviour is similar to "DEP-5" copyright files, + except the order is reversed ("DEP-5" uses "last match wins", where here we are doing "first match wins") + + In the rare case that some path need to be installed into two packages at the same time, then this is + generally done by changing `into` into a list of packages. + + All installations are currently run in *source* package context. This implies that: + + 1) No package specific substitutions are available. Notably `{{PACKAGE}}` cannot be resolved. + 2) All conditions are evaluated in source context. For 99.9% of users, this makes no difference, + but there is a cross-build feature that changes the "per package" architecture which is affected. + + This is a limitation that should be fixed in `debputy`. + + **Attention debhelper users**: Note the difference between `dh_install` (etc.) vs. `debputy` on + overlapping matches for installation. + """ + ), + ), + ) + api.plugable_manifest_rule( + OPARSER_MANIFEST_ROOT, + MK_PACKAGES, + DictFormat, + _handle_opaque_dict, + source_format=Dict[str, Any], + inline_reference_documentation=SUPPORTED_DISPATCHABLE_OBJECT_PARSERS[ + OPARSER_PACKAGES + ], + ) + + +class ManifestVersionFormat(DebputyParsedContent): + manifest_version: ManifestVersion + + +class ListOfInstallRulesFormat(DebputyParsedContent): + elements: List[InstallRule] + + +class DictFormat(DebputyParsedContent): + mapping: Dict[str, Any] + + +class ManifestVariablesParsedFormat(DebputyParsedContent): + variables: Dict[str, str] + + +def _handle_version( + _name: str, + parsed_data: ManifestVersionFormat, + _attribute_path: AttributePath, + _parser_context: ParserContextData, +) -> str: + manifest_version = parsed_data["manifest_version"] + if manifest_version not in SUPPORTED_MANIFEST_VERSIONS: + raise ManifestParseException( + "Unsupported manifest-version. This implementation supports the following versions:" + f' {", ".join(repr(v) for v in SUPPORTED_MANIFEST_VERSIONS)}"' + ) + return manifest_version + + +def _handle_manifest_variables( + _name: str, + parsed_data: ManifestVariablesParsedFormat, + variables_path: AttributePath, + parser_context: ParserContextData, +) -> None: + variables = parsed_data.get("variables", {}) + resolved_vars: Dict[str, Tuple[str, AttributePath]] = {} + manifest_parser: "YAMLManifestParser" = cast("YAMLManifestParser", parser_context) + substitution = manifest_parser.substitution + for key, value_raw in variables.items(): + key_path = variables_path[key] + if not SUBST_VAR_RE.match("{{" + key + "}}"): + raise ManifestParseException( + f"The variable at {key_path.path} has an invalid name and therefore cannot" + " be used." + ) + if substitution.variable_state(key) != VariableNameState.UNDEFINED: + raise ManifestParseException( + f'The variable "{key}" is already reserved/defined. Error triggered by' + f" {key_path.path}." + ) + try: + value = substitution.substitute(value_raw, key_path.path) + except DebputySubstitutionError: + if not resolved_vars: + raise + # See if flushing the variables work + substitution = manifest_parser.add_extra_substitution_variables( + **resolved_vars + ) + resolved_vars = {} + value = substitution.substitute(value_raw, key_path.path) + resolved_vars[key] = (value, key_path) + substitution = manifest_parser.add_extra_substitution_variables(**resolved_vars) + + +def _handle_installation_rules( + _name: str, + parsed_data: ListOfInstallRulesFormat, + _attribute_path: AttributePath, + _parser_context: ParserContextData, +) -> List[Any]: + return parsed_data["elements"] + + +def _handle_opaque_dict( + _name: str, + parsed_data: DictFormat, + _attribute_path: AttributePath, + _parser_context: ParserContextData, +) -> Dict[str, Any]: + return parsed_data["mapping"] diff --git a/src/debputy/plugin/debputy/metadata_detectors.py b/src/debputy/plugin/debputy/metadata_detectors.py new file mode 100644 index 0000000..4338087 --- /dev/null +++ b/src/debputy/plugin/debputy/metadata_detectors.py @@ -0,0 +1,550 @@ +import itertools +import os +import re +import textwrap +from typing import Iterable, Iterator + +from debputy.plugin.api import ( + VirtualPath, + BinaryCtrlAccessor, + PackageProcessingContext, +) +from debputy.plugin.debputy.paths import ( + INITRAMFS_HOOK_DIR, + SYSTEMD_TMPFILES_DIR, + GSETTINGS_SCHEMA_DIR, + SYSTEMD_SYSUSERS_DIR, +) +from debputy.plugin.debputy.types import DebputyCapability +from debputy.util import assume_not_none, _warn + +DPKG_ROOT = '"${DPKG_ROOT}"' +DPKG_ROOT_UNQUOTED = "${DPKG_ROOT}" + +KERNEL_MODULE_EXTENSIONS = tuple( + f"{ext}{comp_ext}" + for ext, comp_ext in itertools.product( + (".o", ".ko"), + ("", ".gz", ".bz2", ".xz"), + ) +) + + +def detect_initramfs_hooks( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + _unused: PackageProcessingContext, +) -> None: + hook_dir = fs_root.lookup(INITRAMFS_HOOK_DIR) + if not hook_dir: + return + for _ in hook_dir.iterdir: + # Only add the trigger if the directory is non-empty. It is unlikely to matter a lot, + # but we do this to match debhelper. + break + else: + return + + ctrl.dpkg_trigger("activate-noawait", "update-initramfs") + + +def _all_tmpfiles_conf(fs_root: VirtualPath) -> Iterable[VirtualPath]: + seen_tmpfiles = set() + tmpfiles_dirs = [ + SYSTEMD_TMPFILES_DIR, + "./etc/tmpfiles.d", + ] + for tmpfiles_dir_path in tmpfiles_dirs: + tmpfiles_dir = fs_root.lookup(tmpfiles_dir_path) + if not tmpfiles_dir: + continue + for path in tmpfiles_dir.iterdir: + if ( + not path.is_file + or not path.name.endswith(".conf") + or path.name in seen_tmpfiles + ): + continue + seen_tmpfiles.add(path.name) + yield path + + +def detect_systemd_tmpfiles( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + _unused: PackageProcessingContext, +) -> None: + tmpfiles_confs = [ + x.name for x in sorted(_all_tmpfiles_conf(fs_root), key=lambda x: x.name) + ] + if not tmpfiles_confs: + return + + tmpfiles_escaped = ctrl.maintscript.escape_shell_words(*tmpfiles_confs) + + snippet = textwrap.dedent( + f"""\ + if [ -x "$(command -v systemd-tmpfiles)" ]; then + systemd-tmpfiles ${{DPKG_ROOT:+--root="$DPKG_ROOT"}} --create {tmpfiles_escaped} || true + fi + """ + ) + + ctrl.maintscript.on_configure(snippet) + + +def _all_sysusers_conf(fs_root: VirtualPath) -> Iterable[VirtualPath]: + sysusers_dir = fs_root.lookup(SYSTEMD_SYSUSERS_DIR) + if not sysusers_dir: + return + for child in sysusers_dir.iterdir: + if not child.name.endswith(".conf"): + continue + yield child + + +def detect_systemd_sysusers( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + _unused: PackageProcessingContext, +) -> None: + sysusers_confs = [p.name for p in _all_sysusers_conf(fs_root)] + if not sysusers_confs: + return + + sysusers_escaped = ctrl.maintscript.escape_shell_words(*sysusers_confs) + + snippet = textwrap.dedent( + f"""\ + systemd-sysusers ${{DPKG_ROOT:+--root="$DPKG_ROOT"}} --create {sysusers_escaped} || true + """ + ) + + ctrl.substvars.add_dependency( + "misc:Depends", "systemd | systemd-standalone-sysusers | systemd-sysusers" + ) + ctrl.maintscript.on_configure(snippet) + + +def detect_icons( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + _unused: PackageProcessingContext, +) -> None: + icons_root_dir = fs_root.lookup("./usr/share/icons") + if not icons_root_dir: + return + icon_dirs = [] + for subdir in icons_root_dir.iterdir: + if subdir.name in ("gnome", "hicolor"): + # dh_icons skips this for some reason. + continue + for p in subdir.all_paths(): + if p.is_file and p.name.endswith((".png", ".svg", ".xpm", ".icon")): + icon_dirs.append(subdir.absolute) + break + if not icon_dirs: + return + + icon_dir_list_escaped = ctrl.maintscript.escape_shell_words(*icon_dirs) + + postinst_snippet = textwrap.dedent( + f"""\ + if command -v update-icon-caches >/dev/null; then + update-icon-caches {icon_dir_list_escaped} + fi + """ + ) + + postrm_snippet = textwrap.dedent( + f"""\ + if command -v update-icon-caches >/dev/null; then + update-icon-caches {icon_dir_list_escaped} + fi + """ + ) + + ctrl.maintscript.on_configure(postinst_snippet) + ctrl.maintscript.unconditionally_in_script("postrm", postrm_snippet) + + +def detect_gsettings_dependencies( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + _unused: PackageProcessingContext, +) -> None: + gsettings_schema_dir = fs_root.lookup(GSETTINGS_SCHEMA_DIR) + if not gsettings_schema_dir: + return + + for path in gsettings_schema_dir.all_paths(): + if path.is_file and path.name.endswith((".xml", ".override")): + ctrl.substvars.add_dependency( + "misc:Depends", "dconf-gsettings-backend | gsettings-backend" + ) + break + + +def detect_kernel_modules( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + _unused: PackageProcessingContext, +) -> None: + for prefix in [".", "./usr"]: + module_root_dir = fs_root.lookup(f"{prefix}/lib/modules") + + if not module_root_dir: + continue + + module_version_dirs = [] + + for module_version_dir in module_root_dir.iterdir: + if not module_version_dir.is_dir: + continue + + for fs_path in module_version_dir.all_paths(): + if fs_path.name.endswith(KERNEL_MODULE_EXTENSIONS): + module_version_dirs.append(module_version_dir.name) + break + + for module_version in module_version_dirs: + module_version_escaped = ctrl.maintscript.escape_shell_words(module_version) + postinst_snippet = textwrap.dedent( + f"""\ + if [ -e /boot/System.map-{module_version_escaped} ]; then + depmod -a -F /boot/System.map-{module_version_escaped} {module_version_escaped} || true + fi + """ + ) + + postrm_snippet = textwrap.dedent( + f"""\ + if [ -e /boot/System.map-{module_version_escaped} ]; then + depmod -a -F /boot/System.map-{module_version_escaped} {module_version_escaped} || true + fi + """ + ) + + ctrl.maintscript.on_configure(postinst_snippet) + # TODO: This should probably be on removal. However, this is what debhelper did and we should + # do the same until we are sure (not that it matters a lot). + ctrl.maintscript.unconditionally_in_script("postrm", postrm_snippet) + + +def detect_xfonts( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + context: PackageProcessingContext, +) -> None: + xfonts_root_dir = fs_root.lookup("./usr/share/fonts/X11/") + if not xfonts_root_dir: + return + + cmds = [] + cmds_postinst = [] + cmds_postrm = [] + escape_shell_words = ctrl.maintscript.escape_shell_words + package_name = context.binary_package.name + + for xfonts_dir in xfonts_root_dir.iterdir: + xfonts_dirname = xfonts_dir.name + if not xfonts_dir.is_dir or xfonts_dirname.startswith("."): + continue + if fs_root.lookup(f"./etc/X11/xfonts/{xfonts_dirname}/{package_name}.scale"): + cmds.append(escape_shell_words("update-fonts-scale", xfonts_dirname)) + cmds.append( + escape_shell_words("update-fonts-dir", "--x11r7-layout", xfonts_dirname) + ) + alias_file = fs_root.lookup( + f"./etc/X11/xfonts/{xfonts_dirname}/{package_name}.alias" + ) + if alias_file: + cmds_postinst.append( + escape_shell_words( + "update-fonts-alias", + "--include", + alias_file.absolute, + xfonts_dirname, + ) + ) + cmds_postrm.append( + escape_shell_words( + "update-fonts-alias", + "--exclude", + alias_file.absolute, + xfonts_dirname, + ) + ) + + if not cmds: + return + + postinst_snippet = textwrap.dedent( + f"""\ + if command -v update-fonts-dir >/dev/null; then + {';'.join(itertools.chain(cmds, cmds_postinst))} + fi + """ + ) + + postrm_snippet = textwrap.dedent( + f"""\ + if [ -x "`command -v update-fonts-dir`" ]; then + {';'.join(itertools.chain(cmds, cmds_postrm))} + fi + """ + ) + + ctrl.maintscript.unconditionally_in_script("postinst", postinst_snippet) + ctrl.maintscript.unconditionally_in_script("postrm", postrm_snippet) + ctrl.substvars.add_dependency("misc:Depends", "xfonts-utils") + + +# debputy does not support python2, so we do not list python / python2. +_PYTHON_PUBLIC_DIST_DIR_NAMES = re.compile(r"(?:pypy|python)3(?:[.]\d+)?") + + +def _public_python_dist_dirs(fs_root: VirtualPath) -> Iterator[VirtualPath]: + usr_lib = fs_root.lookup("./usr/lib") + root_dirs = [] + if usr_lib: + root_dirs.append(usr_lib) + + dbg_root = fs_root.lookup("./usr/lib/debug/usr/lib") + if dbg_root: + root_dirs.append(dbg_root) + + for root_dir in root_dirs: + python_dirs = ( + path + for path in root_dir.iterdir + if path.is_dir and _PYTHON_PUBLIC_DIST_DIR_NAMES.match(path.name) + ) + for python_dir in python_dirs: + dist_packages = python_dir.get("dist-packages") + if not dist_packages: + continue + yield dist_packages + + +def _has_py_file_in_dir(d: VirtualPath) -> bool: + return any(f.is_file and f.name.endswith(".py") for f in d.all_paths()) + + +def detect_pycompile_files( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + context: PackageProcessingContext, +) -> None: + package = context.binary_package.name + # TODO: Support configurable list of private dirs + private_search_dirs = [ + fs_root.lookup(os.path.join(d, package)) + for d in [ + "./usr/share", + "./usr/share/games", + "./usr/lib", + f"./usr/lib/{context.binary_package.deb_multiarch}", + "./usr/lib/games", + ] + ] + private_search_dirs_with_py_files = [ + p for p in private_search_dirs if p is not None and _has_py_file_in_dir(p) + ] + public_search_dirs_has_py_files = any( + p is not None and _has_py_file_in_dir(p) + for p in _public_python_dist_dirs(fs_root) + ) + + if not public_search_dirs_has_py_files and not private_search_dirs_with_py_files: + return + + # The dh_python3 helper also supports -V and -X. We do not use them. They can be + # replaced by bcep support instead, which is how we will be supporting this kind + # of configuration down the line. + ctrl.maintscript.unconditionally_in_script( + "prerm", + textwrap.dedent( + f"""\ + if command -v py3clean >/dev/null 2>&1; then + py3clean -p {package} + else + dpkg -L {package} | sed -En -e '/^(.*)\\/(.+)\\.py$/s,,rm "\\1/__pycache__/\\2".*,e' + find /usr/lib/python3/dist-packages/ -type d -name __pycache__ -empty -print0 | xargs --null --no-run-if-empty rmdir + fi + """ + ), + ) + if public_search_dirs_has_py_files: + ctrl.maintscript.on_configure( + textwrap.dedent( + f"""\ + if command -v py3compile >/dev/null 2>&1; then + py3compile -p {package} + fi + if command -v pypy3compile >/dev/null 2>&1; then + pypy3compile -p {package} || true + fi + """ + ) + ) + for private_dir in private_search_dirs_with_py_files: + escaped_dir = ctrl.maintscript.escape_shell_words(private_dir.absolute) + ctrl.maintscript.on_configure( + textwrap.dedent( + f"""\ + if command -v py3compile >/dev/null 2>&1; then + py3compile -p {package} {escaped_dir} + fi + if command -v pypy3compile >/dev/null 2>&1; then + pypy3compile -p {package} {escaped_dir} || true + fi + """ + ) + ) + + +def translate_capabilities( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + _context: PackageProcessingContext, +) -> None: + caps = [] + maintscript = ctrl.maintscript + for p in fs_root.all_paths(): + if not p.is_file: + continue + metadata_ref = p.metadata(DebputyCapability) + capability = metadata_ref.value + if capability is None: + continue + + abs_path = maintscript.escape_shell_words(p.absolute) + + cap_script = "".join( + [ + " # Triggered by: {DEFINITION_SOURCE}\n" + " _TPATH=$(dpkg-divert --truename {ABS_PATH})\n", + ' if setcap {CAP} "{DPKG_ROOT_UNQUOTED}${{_TPATH}}"; then\n', + ' chmod {MODE} "{DPKG_ROOT_UNQUOTED}${{_TPATH}}"\n', + ' echo "Successfully applied capabilities {CAP} on ${{_TPATH}}"\n', + " else\n", + # We do not reset the mode here; generally a re-install or upgrade would re-store both mode, + # and remove the capabilities. + ' echo "The setcap failed to processes {CAP} on ${{_TPATH}}; falling back to no capability support" >&2\n', + " fi\n", + ] + ).format( + CAP=maintscript.escape_shell_words(capability.capabilities).replace( + "\\+", "+" + ), + DPKG_ROOT_UNQUOTED=DPKG_ROOT_UNQUOTED, + ABS_PATH=abs_path, + MODE=maintscript.escape_shell_words(str(capability.capability_mode)), + DEFINITION_SOURCE=capability.definition_source.replace("\n", "\\n"), + ) + assert cap_script.endswith("\n") + caps.append(cap_script) + + if not caps: + return + + maintscript.on_configure( + textwrap.dedent( + """\ + if command -v setcap > /dev/null; then + {SET_CAP_COMMANDS} + unset _TPATH + else + echo "The setcap utility is not installed available; falling back to no capability support" >&2 + fi + """ + ).format( + SET_CAP_COMMANDS="".join(caps).rstrip("\n"), + ) + ) + + +def pam_auth_update( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessor, + _context: PackageProcessingContext, +) -> None: + pam_configs = fs_root.lookup("/usr/share/pam-configs") + if not pam_configs: + return + maintscript = ctrl.maintscript + for pam_config in pam_configs.iterdir: + if not pam_config.is_file: + continue + maintscript.on_configure("pam-auth-update --package\n") + maintscript.on_before_removal( + textwrap.dedent( + f"""\ + if [ "${{DPKG_MAINTSCRIPT_PACKAGE_REFCOUNT:-1}}" = 1 ]; then + pam-auth-update --package --remove {maintscript.escape_shell_words(pam_config.name)} + fi + """ + ) + ) + + +def auto_depends_arch_any_solink( + fs_foot: VirtualPath, + ctrl: BinaryCtrlAccessor, + context: PackageProcessingContext, +) -> None: + package = context.binary_package + if package.is_arch_all: + return + libbasedir = fs_foot.lookup("usr/lib") + if not libbasedir: + return + libmadir = libbasedir.get(package.deb_multiarch) + if libmadir: + libdirs = [libmadir, libbasedir] + else: + libdirs = [libbasedir] + targets = [] + for libdir in libdirs: + for path in libdir.iterdir: + if not path.is_symlink or not path.name.endswith(".so"): + continue + target = path.readlink() + resolved = assume_not_none(path.parent_dir).lookup(target) + if resolved is not None: + continue + targets.append((libdir.path, target)) + + roots = list(context.accessible_package_roots()) + if not roots: + return + + for libdir, target in targets: + final_path = os.path.join(libdir, target) + matches = [] + for opkg, ofs_root in roots: + m = ofs_root.lookup(final_path) + if not m: + continue + matches.append(opkg) + if not matches or len(matches) > 1: + if matches: + all_matches = ", ".join(p.name for p in matches) + _warn( + f"auto-depends-solink: The {final_path} was found in multiple packages ({all_matches}):" + f" Not generating a dependency." + ) + else: + _warn( + f"auto-depends-solink: The {final_path} was NOT found in any accessible package:" + " Not generating a dependency. This detection only works when both packages are arch:any" + " and they have the same build-profiles." + ) + continue + pkg_dep = matches[0] + # The debputy API should not allow this constraint to fail + assert pkg_dep.is_arch_all == package.is_arch_all + # If both packages are arch:all or both are arch:any, we can generate a tight dependency + relation = f"{pkg_dep.name} (= ${{binary:Version}})" + ctrl.substvars.add_dependency("misc:Depends", relation) diff --git a/src/debputy/plugin/debputy/package_processors.py b/src/debputy/plugin/debputy/package_processors.py new file mode 100644 index 0000000..3747755 --- /dev/null +++ b/src/debputy/plugin/debputy/package_processors.py @@ -0,0 +1,317 @@ +import contextlib +import functools +import gzip +import os +import re +import subprocess +from contextlib import ExitStack +from typing import Optional, Iterator, IO, Any, List, Dict, Callable, Union + +from debputy.plugin.api import VirtualPath +from debputy.util import _error, xargs, escape_shell, _info, assume_not_none + + +@contextlib.contextmanager +def _open_maybe_gzip(path: VirtualPath) -> Iterator[Union[IO[bytes], gzip.GzipFile]]: + if path.name.endswith(".gz"): + with gzip.GzipFile(path.fs_path, "rb") as fd: + yield fd + else: + with path.open(byte_io=True) as fd: + yield fd + + +_SO_LINK_RE = re.compile(rb"[.]so\s+(.*)\s*") +_LA_DEP_LIB_RE = re.compile(rb"'.+'") + + +def _detect_so_link(path: VirtualPath) -> Optional[str]: + so_link_re = _SO_LINK_RE + with _open_maybe_gzip(path) as fd: + for line in fd: + m = so_link_re.search(line) + if m: + return m.group(1).decode("utf-8") + return None + + +def _replace_with_symlink(path: VirtualPath, so_link_target: str) -> None: + adjusted_target = so_link_target + parent_dir = path.parent_dir + assert parent_dir is not None # For the type checking + if parent_dir.name == os.path.dirname(adjusted_target): + # Avoid man8/../man8/foo links + adjusted_target = os.path.basename(adjusted_target) + elif "/" in so_link_target: + # symlinks and so links have a different base directory when the link has a "/". + # Adjust with an extra "../" to align the result + adjusted_target = "../" + adjusted_target + + path.unlink() + parent_dir.add_symlink(path.name, adjusted_target) + + +@functools.lru_cache(1) +def _has_man_recode() -> bool: + # Ideally, we would just use shutil.which or something like that. + # Unfortunately, in debhelper, we experienced problems with which + # returning "yes" for a man tool that actually could not be run + # on salsa CI. + # + # Therefore, we adopt the logic of dh_installman to run the tool + # with --help to confirm it is not broken, because no one could + # figure out what happened in the salsa CI and my life is still + # too short to figure it out. + try: + subprocess.check_call( + ["man-recode", "--help"], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + restore_signals=True, + ) + except subprocess.CalledProcessError: + return False + return True + + +def process_manpages(fs_root: VirtualPath, _unused1: Any, _unused2: Any) -> None: + man_dir = fs_root.lookup("./usr/share/man") + if not man_dir: + return + + re_encode = [] + for path in (p for p in man_dir.all_paths() if p.is_file and p.has_fs_path): + size = path.size + if size == 0: + continue + so_link_target = None + if size <= 1024: + # debhelper has a 1024 byte guard on the basis that ".so file tend to be small". + # That guard worked well for debhelper, so lets keep it for now on that basis alone. + so_link_target = _detect_so_link(path) + if so_link_target: + _replace_with_symlink(path, so_link_target) + else: + re_encode.append(path) + + if not re_encode or not _has_man_recode(): + return + + with ExitStack() as manager: + manpages = [ + manager.enter_context(p.replace_fs_path_content()) for p in re_encode + ] + static_cmd = ["man-recode", "--to-code", "UTF-8", "--suffix", ".encoded"] + for cmd in xargs(static_cmd, manpages): + _info(f"Ensuring manpages have utf-8 encoding via: {escape_shell(*cmd)}") + try: + subprocess.check_call( + cmd, + stdin=subprocess.DEVNULL, + restore_signals=True, + ) + except subprocess.CalledProcessError: + _error( + "The man-recode process failed. Please review the output of `man-recode` to understand" + " what went wrong." + ) + for manpage in manpages: + os.rename(f"{manpage}.encoded", manpage) + + +def _filter_compress_paths() -> Callable[[VirtualPath], Iterator[VirtualPath]]: + ignore_dir_basenames = { + "_sources", + } + ignore_basenames = { + ".htaccess", + "index.sgml", + "objects.inv", + "search_index.json", + "copyright", + } + ignore_extensions = { + ".htm", + ".html", + ".xhtml", + ".gif", + ".png", + ".jpg", + ".jpeg", + ".gz", + ".taz", + ".tgz", + ".z", + ".bz2", + ".epub", + ".jar", + ".zip", + ".odg", + ".odp", + ".odt", + ".css", + ".xz", + ".lz", + ".lzma", + ".haddock", + ".hs", + ".woff", + ".woff2", + ".svg", + ".svgz", + ".js", + ".devhelp2", + ".map", # Technically, dh_compress has this one case-sensitive + } + ignore_special_cases = ("-gz", "-z", "_z") + + def _filtered_walk(path: VirtualPath) -> Iterator[VirtualPath]: + for path, children in path.walk(): + if path.name in ignore_dir_basenames: + children.clear() + continue + if path.is_dir and path.name == "examples": + # Ignore anything beneath /usr/share/doc/*/examples + parent = path.parent_dir + grand_parent = parent.parent_dir if parent else None + if grand_parent and grand_parent.absolute == "/usr/share/doc": + children.clear() + continue + name = path.name + if ( + path.is_symlink + or not path.is_file + or name in ignore_basenames + or not path.has_fs_path + ): + continue + + name_lc = name.lower() + _, ext = os.path.splitext(name_lc) + + if ext in ignore_extensions or name_lc.endswith(ignore_special_cases): + continue + yield path + + return _filtered_walk + + +def _find_compressable_paths(fs_root: VirtualPath) -> Iterator[VirtualPath]: + path_filter = _filter_compress_paths() + + for p, compress_size_threshold in ( + ("./usr/share/info", 0), + ("./usr/share/man", 0), + ("./usr/share/doc", 4096), + ): + path = fs_root.lookup(p) + if path is None: + continue + paths = path_filter(path) + if compress_size_threshold: + # The special-case for changelog and NEWS is from dh_compress. Generally these files + # have always been compressed regardless of their size. + paths = ( + p + for p in paths + if p.size > compress_size_threshold + or p.name.startswith(("changelog", "NEWS")) + ) + yield from paths + x11_path = fs_root.lookup("./usr/share/fonts/X11") + if x11_path: + yield from ( + p for p in x11_path.all_paths() if p.is_file and p.name.endswith(".pcf") + ) + + +def apply_compression(fs_root: VirtualPath, _unused1: Any, _unused2: Any) -> None: + # TODO: Support hardlinks + compressed_files: Dict[str, str] = {} + for path in _find_compressable_paths(fs_root): + parent_dir = assume_not_none(path.parent_dir) + with parent_dir.add_file(f"{path.name}.gz", mtime=path.mtime) as new_file, open( + new_file.fs_path, "wb" + ) as fd: + try: + subprocess.check_call(["gzip", "-9nc", path.fs_path], stdout=fd) + except subprocess.CalledProcessError: + full_command = f"gzip -9nc {escape_shell(path.fs_path)} > {escape_shell(new_file.fs_path)}" + _error( + f"The compression of {path.path} failed. Please review the error message from gzip to" + f" understand what went wrong. Full command was: {full_command}" + ) + compressed_files[path.path] = new_file.path + del parent_dir[path.name] + + all_remaining_symlinks = {p.path: p for p in fs_root.all_paths() if p.is_symlink} + changed = True + while changed: + changed = False + remaining: List[VirtualPath] = list(all_remaining_symlinks.values()) + for symlink in remaining: + target = symlink.readlink() + dir_target, basename_target = os.path.split(target) + new_basename_target = f"{basename_target}.gz" + symlink_parent_dir = assume_not_none(symlink.parent_dir) + dir_path = symlink_parent_dir + if dir_target != "": + dir_path = dir_path.lookup(dir_target) + if ( + not dir_path + or basename_target in dir_path + or new_basename_target not in dir_path + ): + continue + del all_remaining_symlinks[symlink.path] + changed = True + + new_link_name = ( + f"{symlink.name}.gz" + if not symlink.name.endswith(".gz") + else symlink.name + ) + symlink_parent_dir.add_symlink( + new_link_name, os.path.join(dir_target, new_basename_target) + ) + symlink.unlink() + + +def _la_files(fs_root: VirtualPath) -> Iterator[VirtualPath]: + lib_dir = fs_root.lookup("/usr/lib") + if not lib_dir: + return + # Original code only iterators directly in /usr/lib. To be a faithful conversion, we do the same + # here. + # Eagerly resolve the list as the replacement can trigger a runtime error otherwise + paths = list(lib_dir.iterdir) + yield from (p for p in paths if p.is_file and p.name.endswith(".la")) + + +# Conceptually, the same feature that dh_gnome provides. +# The clean_la_files function based on the dh_gnome version written by Luca Falavigna in 2010, +# who in turn references a Makefile version of the feature. +# https://salsa.debian.org/gnome-team/gnome-pkg-tools/-/commit/2868e1e41ea45443b0fb340bf4c71c4de87d4a5b +def clean_la_files( + fs_root: VirtualPath, + _unused1: Any, + _unused2: Any, +) -> None: + for path in _la_files(fs_root): + buffer = [] + with path.open(byte_io=True) as fd: + replace_file = False + for line in fd: + if line.startswith(b"dependency_libs"): + replacement = _LA_DEP_LIB_RE.sub(b"''", line) + if replacement != line: + replace_file = True + line = replacement + buffer.append(line) + + if not replace_file: + continue + _info(f"Clearing the dependency_libs line in {path.path}") + with path.replace_fs_path_content() as fs_path, open(fs_path, "wb") as wfd: + wfd.writelines(buffer) diff --git a/src/debputy/plugin/debputy/paths.py b/src/debputy/plugin/debputy/paths.py new file mode 100644 index 0000000..5e512d1 --- /dev/null +++ b/src/debputy/plugin/debputy/paths.py @@ -0,0 +1,4 @@ +GSETTINGS_SCHEMA_DIR = "/usr/share/glib-2.0/schemas" +INITRAMFS_HOOK_DIR = "/usr/share/initramfs-tools/hooks" +SYSTEMD_TMPFILES_DIR = "/usr/lib/tmpfiles.d" +SYSTEMD_SYSUSERS_DIR = "/usr/lib/sysusers.d" diff --git a/src/debputy/plugin/debputy/private_api.py b/src/debputy/plugin/debputy/private_api.py new file mode 100644 index 0000000..2db2b56 --- /dev/null +++ b/src/debputy/plugin/debputy/private_api.py @@ -0,0 +1,2931 @@ +import ctypes +import ctypes.util +import functools +import itertools +import textwrap +import time +from datetime import datetime +from typing import ( + cast, + NotRequired, + Optional, + Tuple, + Union, + Type, + TypedDict, + List, + Annotated, + Any, + Dict, + Callable, +) + +from debian.changelog import Changelog +from debian.deb822 import Deb822 + +from debputy import DEBPUTY_DOC_ROOT_DIR +from debputy._manifest_constants import ( + MK_CONFFILE_MANAGEMENT_X_OWNING_PACKAGE, + MK_CONFFILE_MANAGEMENT_X_PRIOR_TO_VERSION, + MK_INSTALLATIONS_INSTALL_EXAMPLES, + MK_INSTALLATIONS_INSTALL, + MK_INSTALLATIONS_INSTALL_DOCS, + MK_INSTALLATIONS_INSTALL_MAN, + MK_INSTALLATIONS_DISCARD, + MK_INSTALLATIONS_MULTI_DEST_INSTALL, +) +from debputy.exceptions import DebputyManifestVariableRequiresDebianDirError +from debputy.installations import InstallRule +from debputy.maintscript_snippet import DpkgMaintscriptHelperCommand +from debputy.manifest_conditions import ( + ManifestCondition, + BinaryPackageContextArchMatchManifestCondition, + BuildProfileMatch, + SourceContextArchMatchManifestCondition, +) +from debputy.manifest_parser.base_types import ( + DebputyParsedContent, + DebputyParsedContentStandardConditional, + FileSystemMode, + StaticFileSystemOwner, + StaticFileSystemGroup, + SymlinkTarget, + FileSystemExactMatchRule, + FileSystemMatchRule, + SymbolicMode, + TypeMapping, + OctalMode, + FileSystemExactNonDirMatchRule, +) +from debputy.manifest_parser.declarative_parser import DebputyParseHint +from debputy.manifest_parser.exceptions import ManifestParseException +from debputy.manifest_parser.mapper_code import type_mapper_str2package +from debputy.manifest_parser.parser_data import ParserContextData +from debputy.manifest_parser.util import AttributePath +from debputy.packages import BinaryPackage +from debputy.path_matcher import ExactFileSystemPath +from debputy.plugin.api import ( + DebputyPluginInitializer, + documented_attr, + reference_documentation, + VirtualPath, + packager_provided_file_reference_documentation, +) +from debputy.plugin.api.impl import DebputyPluginInitializerProvider +from debputy.plugin.api.impl_types import automatic_discard_rule_example, PPFFormatParam +from debputy.plugin.api.spec import ( + type_mapping_reference_documentation, + type_mapping_example, +) +from debputy.plugin.debputy.binary_package_rules import register_binary_package_rules +from debputy.plugin.debputy.discard_rules import ( + _debputy_discard_pyc_files, + _debputy_prune_la_files, + _debputy_prune_doxygen_cruft, + _debputy_prune_binary_debian_dir, + _debputy_prune_info_dir_file, + _debputy_prune_backup_files, + _debputy_prune_vcs_paths, +) +from debputy.plugin.debputy.manifest_root_rules import register_manifest_root_rules +from debputy.plugin.debputy.package_processors import ( + process_manpages, + apply_compression, + clean_la_files, +) +from debputy.plugin.debputy.service_management import ( + detect_systemd_service_files, + generate_snippets_for_systemd_units, + detect_sysv_init_service_files, + generate_snippets_for_init_scripts, +) +from debputy.plugin.debputy.shlib_metadata_detectors import detect_shlibdeps +from debputy.plugin.debputy.strip_non_determinism import strip_non_determinism +from debputy.substitution import VariableContext +from debputy.transformation_rules import ( + CreateSymlinkReplacementRule, + TransformationRule, + CreateDirectoryTransformationRule, + RemoveTransformationRule, + MoveTransformationRule, + PathMetadataTransformationRule, + CreateSymlinkPathTransformationRule, +) +from debputy.util import ( + _normalize_path, + PKGNAME_REGEX, + PKGVERSION_REGEX, + debian_policy_normalize_symlink_target, + active_profiles_match, + _error, + _warn, + _info, + assume_not_none, +) + +_DOCUMENTED_DPKG_ARCH_TYPES = { + "HOST": ( + "installed on", + "The package will be **installed** on this type of machine / system", + ), + "BUILD": ( + "compiled on", + "The compilation of this package will be performed **on** this kind of machine / system", + ), + "TARGET": ( + "cross-compiler output", + "When building a cross-compiler, it will produce output for this kind of machine/system", + ), +} + +_DOCUMENTED_DPKG_ARCH_VARS = { + "ARCH": "Debian's name for the architecture", + "ARCH_ABI": "Debian's name for the architecture ABI", + "ARCH_BITS": "Number of bits in the pointer size", + "ARCH_CPU": "Debian's name for the CPU type", + "ARCH_ENDIAN": "Endianness of the architecture (little/big)", + "ARCH_LIBC": "Debian's name for the libc implementation", + "ARCH_OS": "Debian name for the OS/kernel", + "GNU_CPU": "GNU's name for the CPU", + "GNU_SYSTEM": "GNU's name for the system", + "GNU_TYPE": "GNU system type (GNU_CPU and GNU_SYSTEM combined)", + "MULTIARCH": "Multi-arch tuple", +} + + +def _manifest_format_doc(anchor: str) -> str: + return f"{DEBPUTY_DOC_ROOT_DIR}/MANIFEST-FORMAT.md#{anchor}" + + +@functools.lru_cache +def load_libcap() -> Tuple[bool, Optional[str], Callable[[str], bool]]: + cap_library_path = ctypes.util.find_library("cap.so") + has_libcap = False + libcap = None + if cap_library_path: + try: + libcap = ctypes.cdll.LoadLibrary(cap_library_path) + has_libcap = True + except OSError: + pass + + if libcap is None: + warned = False + + def _is_valid_cap(cap: str) -> bool: + nonlocal warned + if not warned: + _info( + "Could not load libcap.so; will not validate capabilities. Use `apt install libcap2` to provide" + " checking of capabilities." + ) + warned = True + return True + + else: + # cap_t cap_from_text(const char *path_p) + libcap.cap_from_text.argtypes = [ctypes.c_char_p] + libcap.cap_from_text.restype = ctypes.c_char_p + + libcap.cap_free.argtypes = [ctypes.c_void_p] + libcap.cap_free.restype = None + + def _is_valid_cap(cap: str) -> bool: + cap_t = libcap.cap_from_text(cap.encode("utf-8")) + ok = cap_t is not None + libcap.cap_free(cap_t) + return ok + + return has_libcap, cap_library_path, _is_valid_cap + + +def check_cap_checker() -> Callable[[str, str], None]: + _, libcap_path, is_valid_cap = load_libcap() + + seen_cap = set() + + def _check_cap(cap: str, definition_source: str) -> None: + if cap not in seen_cap and not is_valid_cap(cap): + seen_cap.add(cap) + cap_path = f" ({libcap_path})" if libcap_path is not None else "" + _warn( + f'The capabilities "{cap}" provided in {definition_source} were not understood by' + f" libcap.so{cap_path}. Please verify you provided the correct capabilities." + f" Note: This warning can be a false-positive if you are targeting a newer libcap.so" + f" than the one installed on this system." + ) + + return _check_cap + + +def load_source_variables(variable_context: VariableContext) -> Dict[str, str]: + try: + changelog = variable_context.debian_dir.lookup("changelog") + if changelog is None: + raise DebputyManifestVariableRequiresDebianDirError( + "The changelog was not present" + ) + with changelog.open() as fd: + dch = Changelog(fd, max_blocks=2) + except FileNotFoundError as e: + raise DebputyManifestVariableRequiresDebianDirError( + "The changelog was not present" + ) from e + first_entry = dch[0] + first_non_binnmu_entry = dch[0] + if first_non_binnmu_entry.other_pairs.get("binary-only", "no") == "yes": + first_non_binnmu_entry = dch[1] + assert first_non_binnmu_entry.other_pairs.get("binary-only", "no") == "no" + source_version = first_entry.version + epoch = source_version.epoch + upstream_version = source_version.upstream_version + debian_revision = source_version.debian_revision + epoch_upstream = upstream_version + upstream_debian_revision = upstream_version + if epoch is not None and epoch != "": + epoch_upstream = f"{epoch}:{upstream_version}" + if debian_revision is not None and debian_revision != "": + upstream_debian_revision = f"{upstream_version}-{debian_revision}" + + package = first_entry.package + if package is None: + _error("Cannot determine the source package name from debian/changelog.") + + date = first_entry.date + if date is not None: + local_time = datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %z") + source_date_epoch = str(int(local_time.timestamp())) + else: + _warn( + "The latest changelog entry does not have a (parsable) date, using current time" + " for SOURCE_DATE_EPOCH" + ) + source_date_epoch = str(int(time.time())) + + if first_non_binnmu_entry is not first_entry: + non_binnmu_date = first_non_binnmu_entry.date + if non_binnmu_date is not None: + local_time = datetime.strptime(non_binnmu_date, "%a, %d %b %Y %H:%M:%S %z") + snd_source_date_epoch = str(int(local_time.timestamp())) + else: + _warn( + "The latest (non-binNMU) changelog entry does not have a (parsable) date, using current time" + " for SOURCE_DATE_EPOCH (for strip-nondeterminism)" + ) + snd_source_date_epoch = source_date_epoch = str(int(time.time())) + else: + snd_source_date_epoch = source_date_epoch + return { + "DEB_SOURCE": package, + "DEB_VERSION": source_version.full_version, + "DEB_VERSION_EPOCH_UPSTREAM": epoch_upstream, + "DEB_VERSION_UPSTREAM_REVISION": upstream_debian_revision, + "DEB_VERSION_UPSTREAM": upstream_version, + "SOURCE_DATE_EPOCH": source_date_epoch, + "_DEBPUTY_INTERNAL_NON_BINNMU_SOURCE": str(first_non_binnmu_entry.version), + "_DEBPUTY_SND_SOURCE_DATE_EPOCH": snd_source_date_epoch, + } + + +def initialize_via_private_api(public_api: DebputyPluginInitializer) -> None: + api = cast("DebputyPluginInitializerProvider", public_api) + + api.metadata_or_maintscript_detector( + "dpkg-shlibdeps", + # Private because detect_shlibdeps expects private API (hench this cast) + cast("MetadataAutoDetector", detect_shlibdeps), + package_type={"deb", "udeb"}, + ) + register_type_mappings(api) + register_variables_via_private_api(api) + document_builtin_variables(api) + register_automatic_discard_rules(api) + register_special_ppfs(api) + register_install_rules(api) + register_transformation_rules(api) + register_manifest_condition_rules(api) + register_dpkg_conffile_rules(api) + register_processing_steps(api) + register_service_managers(api) + register_manifest_root_rules(api) + register_binary_package_rules(api) + + +def register_type_mappings(api: DebputyPluginInitializerProvider) -> None: + api.register_mapped_type( + TypeMapping( + FileSystemMatchRule, + str, + FileSystemMatchRule.parse_path_match, + ), + reference_documentation=type_mapping_reference_documentation( + description=textwrap.dedent( + """\ + A generic file system path match with globs. + + Manifest variable substitution will be applied and glob expansion will be performed. + + The match will be read as one of the following cases: + + - Exact path match if there is no globs characters like `usr/bin/debputy` + - A basename glob like `*.txt` or `**/foo` + - A generic path glob otherwise like `usr/lib/*.so*` + + Except for basename globs, all matches are always relative to the root directory of + the match, which is typically the package root directory or a search directory. + + For basename globs, any path matching that basename beneath the package root directory + or relevant search directories will match. + + Please keep in mind that: + + * glob patterns often have to be quoted as YAML interpret the glob metacharacter as + an anchor reference. + + * Directories can be matched via this type. Whether the rule using this type + recurse into the directory depends on the usage and not this type. Related, if + value for this rule ends with a literal "/", then the definition can *only* match + directories (similar to the shell). + + * path matches involving glob expansion are often subject to different rules than + path matches without them. As an example, automatic discard rules does not apply + to exact path matches, but they will filter out glob matches. + """, + ), + examples=[ + type_mapping_example("usr/bin/debputy"), + type_mapping_example("*.txt"), + type_mapping_example("**/foo"), + type_mapping_example("usr/lib/*.so*"), + type_mapping_example("usr/share/foo/data-*/"), + ], + ), + ) + + api.register_mapped_type( + TypeMapping( + FileSystemExactMatchRule, + str, + FileSystemExactMatchRule.parse_path_match, + ), + reference_documentation=type_mapping_reference_documentation( + description=textwrap.dedent( + """\ + A file system match that does **not** expand globs. + + Manifest variable substitution will be applied. However, globs will not be expanded. + Any glob metacharacters will be interpreted as a literal part of path. + + Note that a directory can be matched via this type. Whether the rule using this type + recurse into the directory depends on the usage and is not defined by this type. + Related, if value for this rule ends with a literal "/", then the definition can + *only* match directories (similar to the shell). + """, + ), + examples=[ + type_mapping_example("usr/bin/dpkg"), + type_mapping_example("usr/share/foo/"), + type_mapping_example("usr/share/foo/data.txt"), + ], + ), + ) + + api.register_mapped_type( + TypeMapping( + FileSystemExactNonDirMatchRule, + str, + FileSystemExactNonDirMatchRule.parse_path_match, + ), + reference_documentation=type_mapping_reference_documentation( + description=textwrap.dedent( + f"""\ + A file system match that does **not** expand globs and must not match a directory. + + Manifest variable substitution will be applied. However, globs will not be expanded. + Any glob metacharacters will be interpreted as a literal part of path. + + This is like {FileSystemExactMatchRule.__name__} except that the match will fail if the + provided path matches a directory. Since a directory cannot be matched, it is an error + for any input to end with a "/" as only directories can be matched if the path ends + with a "/". + """, + ), + examples=[ + type_mapping_example("usr/bin/dh_debputy"), + type_mapping_example("usr/share/foo/data.txt"), + ], + ), + ) + + api.register_mapped_type( + TypeMapping( + SymlinkTarget, + str, + lambda v, ap, pc: SymlinkTarget.parse_symlink_target( + v, ap, assume_not_none(pc).substitution + ), + ), + reference_documentation=type_mapping_reference_documentation( + description=textwrap.dedent( + """\ + A symlink target. + + Manifest variable substitution will be applied. This is distinct from an exact file + system match in that a symlink target is not relative to the package root by default + (explicitly prefix for "/" for absolute path targets) + + Note that `debputy` will policy normalize symlinks when assembling the deb, so + use of relative or absolute symlinks comes down to preference. + """, + ), + examples=[ + type_mapping_example("../foo"), + type_mapping_example("/usr/share/doc/bar"), + ], + ), + ) + + api.register_mapped_type( + TypeMapping( + StaticFileSystemOwner, + Union[int, str], + lambda v, ap, _: StaticFileSystemOwner.from_manifest_value(v, ap), + ), + reference_documentation=type_mapping_reference_documentation( + description=textwrap.dedent( + """\ + File system owner reference that is part of the passwd base data (such as "root"). + + The group can be provided in either of the following three forms: + + * A name (recommended), such as "root" + * The UID in the form of an integer (that is, no quoting), such as 0 (for "root") + * The name and the UID separated by colon such as "root:0" (for "root"). + + Note in the last case, the `debputy` will validate that the name and the UID match. + + Some owners (such as "nobody") are deliberately disallowed. + """ + ), + examples=[ + type_mapping_example("root"), + type_mapping_example(0), + type_mapping_example("root:0"), + type_mapping_example("bin"), + ], + ), + ) + api.register_mapped_type( + TypeMapping( + StaticFileSystemGroup, + Union[int, str], + lambda v, ap, _: StaticFileSystemGroup.from_manifest_value(v, ap), + ), + reference_documentation=type_mapping_reference_documentation( + description=textwrap.dedent( + """\ + File system group reference that is part of the passwd base data (such as "root"). + + The group can be provided in either of the following three forms: + + * A name (recommended), such as "root" + * The GID in the form of an integer (that is, no quoting), such as 0 (for "root") + * The name and the GID separated by colon such as "root:0" (for "root"). + + Note in the last case, the `debputy` will validate that the name and the GID match. + + Some owners (such as "nobody") are deliberately disallowed. + """ + ), + examples=[ + type_mapping_example("root"), + type_mapping_example(0), + type_mapping_example("root:0"), + type_mapping_example("tty"), + ], + ), + ) + + api.register_mapped_type( + TypeMapping( + BinaryPackage, + str, + type_mapper_str2package, + ), + reference_documentation=type_mapping_reference_documentation( + description="Name of a package in debian/control", + ), + ) + + api.register_mapped_type( + TypeMapping( + FileSystemMode, + str, + lambda v, ap, _: FileSystemMode.parse_filesystem_mode(v, ap), + ), + reference_documentation=type_mapping_reference_documentation( + description="Either an octal mode or symbolic mode", + examples=[ + type_mapping_example("a+x"), + type_mapping_example("u=rwX,go=rX"), + type_mapping_example("0755"), + ], + ), + ) + api.register_mapped_type( + TypeMapping( + OctalMode, + str, + lambda v, ap, _: OctalMode.parse_filesystem_mode(v, ap), + ), + reference_documentation=type_mapping_reference_documentation( + description="An octal mode. Must always be a string.", + examples=[ + type_mapping_example("0644"), + type_mapping_example("0755"), + ], + ), + ) + + +def register_service_managers( + api: DebputyPluginInitializerProvider, +) -> None: + api.service_provider( + "systemd", + detect_systemd_service_files, + generate_snippets_for_systemd_units, + ) + api.service_provider( + "sysvinit", + detect_sysv_init_service_files, + generate_snippets_for_init_scripts, + ) + + +def register_automatic_discard_rules( + api: DebputyPluginInitializerProvider, +) -> None: + api.automatic_discard_rule( + "python-cache-files", + _debputy_discard_pyc_files, + rule_reference_documentation="Discards any *.pyc, *.pyo files and any __pycache__ directories", + examples=automatic_discard_rule_example( + (".../foo.py", False), + ".../__pycache__/", + ".../__pycache__/...", + ".../foo.pyc", + ".../foo.pyo", + ), + ) + api.automatic_discard_rule( + "la-files", + _debputy_prune_la_files, + rule_reference_documentation="Discards any file with the extension .la beneath the directory /usr/lib", + examples=automatic_discard_rule_example( + "usr/lib/libfoo.la", + ("usr/lib/libfoo.so.1.0.0", False), + ), + ) + api.automatic_discard_rule( + "backup-files", + _debputy_prune_backup_files, + rule_reference_documentation="Discards common back up files such as foo~, foo.bak or foo.orig", + examples=( + automatic_discard_rule_example( + ".../foo~", + ".../foo.orig", + ".../foo.rej", + ".../DEADJOE", + ".../.foo.sw.", + ), + ), + ) + api.automatic_discard_rule( + "version-control-paths", + _debputy_prune_vcs_paths, + rule_reference_documentation="Discards common version control paths such as .git, .gitignore, CVS, etc.", + examples=automatic_discard_rule_example( + ("tools/foo", False), + ".../CVS/", + ".../CVS/...", + ".../.gitignore", + ".../.gitattributes", + ".../.git/", + ".../.git/...", + ), + ) + api.automatic_discard_rule( + "gnu-info-dir-file", + _debputy_prune_info_dir_file, + rule_reference_documentation="Discards the /usr/share/info/dir file (causes package file conflicts)", + examples=automatic_discard_rule_example( + "usr/share/info/dir", + ("usr/share/info/foo.info", False), + ("usr/share/info/dir.info", False), + ("usr/share/random/case/dir", False), + ), + ) + api.automatic_discard_rule( + "debian-dir", + _debputy_prune_binary_debian_dir, + rule_reference_documentation="(Implementation detail) Discards any DEBIAN directory to avoid it from appearing" + " literally in the file listing", + examples=( + automatic_discard_rule_example( + "DEBIAN/", + "DEBIAN/control", + ("usr/bin/foo", False), + ("usr/share/DEBIAN/foo", False), + ), + ), + ) + api.automatic_discard_rule( + "doxygen-cruft-files", + _debputy_prune_doxygen_cruft, + rule_reference_documentation="Discards cruft files generated by doxygen", + examples=automatic_discard_rule_example( + ("usr/share/doc/foo/api/doxygen.css", False), + ("usr/share/doc/foo/api/doxygen.svg", False), + ("usr/share/doc/foo/api/index.html", False), + "usr/share/doc/foo/api/.../cruft.map", + "usr/share/doc/foo/api/.../cruft.md5", + ), + ) + + +def register_processing_steps(api: DebputyPluginInitializerProvider) -> None: + api.package_processor("manpages", process_manpages) + api.package_processor("clean-la-files", clean_la_files) + # strip-non-determinism makes assumptions about the PackageProcessingContext implementation + api.package_processor( + "strip-nondeterminism", + cast("Any", strip_non_determinism), + depends_on_processor=["manpages"], + ) + api.package_processor( + "compression", + apply_compression, + depends_on_processor=["manpages", "strip-nondeterminism"], + ) + + +def register_variables_via_private_api(api: DebputyPluginInitializerProvider) -> None: + api.manifest_variable_provider( + load_source_variables, + { + "DEB_SOURCE": "Name of the source package (`dpkg-parsechangelog -SSource`)", + "DEB_VERSION": "Version from the top most changelog entry (`dpkg-parsechangelog -SVersion`)", + "DEB_VERSION_EPOCH_UPSTREAM": "Version from the top most changelog entry *without* the Debian revision", + "DEB_VERSION_UPSTREAM_REVISION": "Version from the top most changelog entry *without* the epoch", + "DEB_VERSION_UPSTREAM": "Upstream version from the top most changelog entry (that is, *without* epoch and Debian revision)", + "SOURCE_DATE_EPOCH": textwrap.dedent( + """\ + Timestamp from the top most changelog entry (`dpkg-parsechangelog -STimestamp`) + Please see https://reproducible-builds.org/docs/source-date-epoch/ for the full definition of + this variable. + """ + ), + "_DEBPUTY_INTERNAL_NON_BINNMU_SOURCE": None, + "_DEBPUTY_SND_SOURCE_DATE_EPOCH": None, + }, + ) + + +def document_builtin_variables(api: DebputyPluginInitializerProvider) -> None: + api.document_builtin_variable( + "PACKAGE", + "Name of the binary package (only available in binary context)", + is_context_specific=True, + ) + + arch_types = _DOCUMENTED_DPKG_ARCH_TYPES + + for arch_type, (arch_type_tag, arch_type_doc) in arch_types.items(): + for arch_var, arch_var_doc in _DOCUMENTED_DPKG_ARCH_VARS.items(): + full_var = f"DEB_{arch_type}_{arch_var}" + documentation = textwrap.dedent( + f"""\ + {arch_var_doc} ({arch_type_tag}) + This variable describes machine information used when the package is compiled and assembled. + * Machine type: {arch_type_doc} + * Value description: {arch_var_doc} + + The value is the output of: `dpkg-architecture -q{full_var}` + """ + ) + api.document_builtin_variable( + full_var, + documentation, + is_for_special_case=arch_type != "HOST", + ) + + +def _format_docbase_filename( + path_format: str, + format_param: PPFFormatParam, + docbase_file: VirtualPath, +) -> str: + with docbase_file.open() as fd: + content = Deb822(fd) + proper_name = content["Document"] + if proper_name is not None: + format_param["name"] = proper_name + else: + _warn( + f"The docbase file {docbase_file.fs_path} is missing the Document field" + ) + return path_format.format(**format_param) + + +def register_special_ppfs(api: DebputyPluginInitializerProvider) -> None: + api.packager_provided_file( + "doc-base", + "/usr/share/doc-base/{owning_package}.{name}", + format_callback=_format_docbase_filename, + ) + + api.packager_provided_file( + "shlibs", + "DEBIAN/shlibs", + allow_name_segment=False, + reservation_only=True, + reference_documentation=packager_provided_file_reference_documentation( + format_documentation_uris=["man:deb-shlibs(5)"], + ), + ) + api.packager_provided_file( + "symbols", + "DEBIAN/symbols", + allow_name_segment=False, + allow_architecture_segment=True, + reservation_only=True, + reference_documentation=packager_provided_file_reference_documentation( + format_documentation_uris=["man:deb-symbols(5)"], + ), + ) + api.packager_provided_file( + "templates", + "DEBIAN/templates", + allow_name_segment=False, + allow_architecture_segment=False, + reservation_only=True, + ) + api.packager_provided_file( + "alternatives", + "DEBIAN/alternatives", + allow_name_segment=False, + allow_architecture_segment=True, + reservation_only=True, + ) + + +def register_install_rules(api: DebputyPluginInitializerProvider) -> None: + api.plugable_manifest_rule( + InstallRule, + MK_INSTALLATIONS_INSTALL, + ParsedInstallRule, + _install_rule_handler, + source_format=_with_alt_form(ParsedInstallRuleSourceFormat), + inline_reference_documentation=reference_documentation( + title="Generic install (`install`)", + description=textwrap.dedent( + """\ + The generic `install` rule can be used to install arbitrary paths into packages + and is *similar* to how `dh_install` from debhelper works. It is a two "primary" uses. + + 1) The classic "install into directory" similar to the standard `dh_install` + 2) The "install as" similar to `dh-exec`'s `foo => bar` feature. + + The `install` rule installs a path exactly once into each package it acts on. In + the rare case that you want to install the same source *multiple* times into the + *same* packages, please have a look at `{MULTI_DEST_INSTALL}`. + """.format( + MULTI_DEST_INSTALL=MK_INSTALLATIONS_MULTI_DEST_INSTALL + ) + ), + non_mapping_description=textwrap.dedent( + """\ + When the input is a string or a list of string, then that value is used as shorthand + for `source` or `sources` (respectively). This form can only be used when `into` is + not required. + """ + ), + attributes=[ + documented_attr( + ["source", "sources"], + textwrap.dedent( + """\ + A path match (`source`) or a list of path matches (`sources`) defining the + source path(s) to be installed. The path match(es) can use globs. Each match + is tried against default search directories. + - When a symlink is matched, then the symlink (not its target) is installed + as-is. When a directory is matched, then the directory is installed along + with all the contents that have not already been installed somewhere. + """ + ), + ), + documented_attr( + "dest_dir", + textwrap.dedent( + """\ + A path defining the destination *directory*. The value *cannot* use globs, but can + use substitution. If neither `as` nor `dest-dir` is given, then `dest-dir` defaults + to the directory name of the `source`. + """ + ), + ), + documented_attr( + "into", + textwrap.dedent( + """\ + Either a package name or a list of package names for which these paths should be + installed. This key is conditional on whether there are multiple binary packages listed + in `debian/control`. When there is only one binary package, then that binary is the + default for `into`. Otherwise, the key is required. + """ + ), + ), + documented_attr( + "install_as", + textwrap.dedent( + """\ + A path defining the path to install the source as. This is a full path. This option + is mutually exclusive with `dest-dir` and `sources` (but not `source`). When `as` is + given, then `source` must match exactly one "not yet matched" path. + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc("generic-install-install"), + ), + ) + api.plugable_manifest_rule( + InstallRule, + [ + MK_INSTALLATIONS_INSTALL_DOCS, + "install-doc", + ], + ParsedInstallRule, + _install_docs_rule_handler, + source_format=_with_alt_form(ParsedInstallDocRuleSourceFormat), + inline_reference_documentation=reference_documentation( + title="Install documentation (`install-docs`)", + description=textwrap.dedent( + """\ + This install rule resemble that of `dh_installdocs`. It is a shorthand over the generic + `install` rule with the following key features: + + 1) The default `dest-dir` is to use the package's documentation directory (usually something + like `/usr/share/doc/{{PACKAGE}}`, though it respects the "main documentation package" + recommendation from Debian Policy). The `dest-dir` or `as` can be set in case the + documentation in question goes into another directory or with a concrete path. In this + case, it is still "better" than `install` due to the remaining benefits. + 2) The rule comes with pre-defined conditional logic for skipping the rule under + `DEB_BUILD_OPTIONS=nodoc`, so you do not have to write that conditional yourself. + 3) The `into` parameter can be omitted as long as there is a exactly one non-`udeb` + package listed in `debian/control`. + + With these two things in mind, it behaves just like the `install` rule. + + Note: It is often worth considering to use a more specialized version of the `install-docs` + rule when one such is available. If you are looking to install an example or a manpage, + consider whether `install-examples` or `install-man` might be a better fit for your + use-case. + """ + ), + non_mapping_description=textwrap.dedent( + """\ + When the input is a string or a list of string, then that value is used as shorthand + for `source` or `sources` (respectively). This form can only be used when `into` is + not required. + """ + ), + attributes=[ + documented_attr( + ["source", "sources"], + textwrap.dedent( + """\ + A path match (`source`) or a list of path matches (`sources`) defining the + source path(s) to be installed. The path match(es) can use globs. Each match + is tried against default search directories. + - When a symlink is matched, then the symlink (not its target) is installed + as-is. When a directory is matched, then the directory is installed along + with all the contents that have not already been installed somewhere. + + - **CAVEAT**: Specifying `source: examples` where `examples` resolves to a + directory for `install-examples` will give you an `examples/examples` + directory in the package, which is rarely what you want. Often, you + can solve this by using `examples/*` instead. Similar for `install-docs` + and a `doc` or `docs` directory. + """ + ), + ), + documented_attr( + "dest_dir", + textwrap.dedent( + """\ + A path defining the destination *directory*. The value *cannot* use globs, but can + use substitution. If neither `as` nor `dest-dir` is given, then `dest-dir` defaults + to the relevant package documentation directory (a la `/usr/share/doc/{{PACKAGE}}`). + """ + ), + ), + documented_attr( + "into", + textwrap.dedent( + """\ + Either a package name or a list of package names for which these paths should be + installed as documentation. This key is conditional on whether there are multiple + (non-`udeb`) binary packages listed in `debian/control`. When there is only one + (non-`udeb`) binary package, then that binary is the default for `into`. Otherwise, + the key is required. + """ + ), + ), + documented_attr( + "install_as", + textwrap.dedent( + """\ + A path defining the path to install the source as. This is a full path. This option + is mutually exclusive with `dest-dir` and `sources` (but not `source`). When `as` is + given, then `source` must match exactly one "not yet matched" path. + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + This condition will be combined with the built-in condition provided by these rules + (rather than replacing it). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "install-documentation-install-docs" + ), + ), + ) + api.plugable_manifest_rule( + InstallRule, + [ + MK_INSTALLATIONS_INSTALL_EXAMPLES, + "install-example", + ], + ParsedInstallExamplesRule, + _install_examples_rule_handler, + source_format=_with_alt_form(ParsedInstallExamplesRuleSourceFormat), + inline_reference_documentation=reference_documentation( + title="Install examples (`install-examples`)", + description=textwrap.dedent( + """\ + This install rule resemble that of `dh_installexamples`. It is a shorthand over the generic ` + install` rule with the following key features: + + 1) It pre-defines the `dest-dir` that respects the "main documentation package" recommendation from + Debian Policy. The `install-examples` will use the `examples` subdir for the package documentation + dir. + 2) The rule comes with pre-defined conditional logic for skipping the rule under + `DEB_BUILD_OPTIONS=nodoc`, so you do not have to write that conditional yourself. + 3) The `into` parameter can be omitted as long as there is a exactly one non-`udeb` + package listed in `debian/control`. + + With these two things in mind, it behaves just like the `install` rule. + """ + ), + non_mapping_description=textwrap.dedent( + """\ + When the input is a string or a list of string, then that value is used as shorthand + for `source` or `sources` (respectively). This form can only be used when `into` is + not required. + """ + ), + attributes=[ + documented_attr( + ["source", "sources"], + textwrap.dedent( + """\ + A path match (`source`) or a list of path matches (`sources`) defining the + source path(s) to be installed. The path match(es) can use globs. Each match + is tried against default search directories. + - When a symlink is matched, then the symlink (not its target) is installed + as-is. When a directory is matched, then the directory is installed along + with all the contents that have not already been installed somewhere. + + - **CAVEAT**: Specifying `source: examples` where `examples` resolves to a + directory for `install-examples` will give you an `examples/examples` + directory in the package, which is rarely what you want. Often, you + can solve this by using `examples/*` instead. Similar for `install-docs` + and a `doc` or `docs` directory. + """ + ), + ), + documented_attr( + "into", + textwrap.dedent( + """\ + Either a package name or a list of package names for which these paths should be + installed as examples. This key is conditional on whether there are (non-`udeb`) + multiple binary packages listed in `debian/control`. When there is only one + (non-`udeb`) binary package, then that binary is the default for `into`. + Otherwise, the key is required. + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + This condition will be combined with the built-in condition provided by these rules + (rather than replacing it). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "install-examples-install-examples" + ), + ), + ) + api.plugable_manifest_rule( + InstallRule, + MK_INSTALLATIONS_INSTALL_MAN, + ParsedInstallManpageRule, + _install_man_rule_handler, + source_format=_with_alt_form(ParsedInstallManpageRuleSourceFormat), + inline_reference_documentation=reference_documentation( + title="Install manpages (`install-man`)", + description=textwrap.dedent( + """\ + Install rule for installing manpages similar to `dh_installman`. It is a shorthand + over the generic `install` rule with the following key features: + + 1) The rule can only match files (notably, symlinks cannot be matched by this rule). + 2) The `dest-dir` is computed per source file based on the manpage's section and + language. + 3) The `into` parameter can be omitted as long as there is a exactly one non-`udeb` + package listed in `debian/control`. + 4) The rule comes with manpage specific attributes such as `language` and `section` + for when the auto-detection is insufficient. + 5) The rule comes with pre-defined conditional logic for skipping the rule under + `DEB_BUILD_OPTIONS=nodoc`, so you do not have to write that conditional yourself. + + With these things in mind, the rule behaves similar to the `install` rule. + """ + ), + non_mapping_description=textwrap.dedent( + """\ + When the input is a string or a list of string, then that value is used as shorthand + for `source` or `sources` (respectively). This form can only be used when `into` is + not required. + """ + ), + attributes=[ + documented_attr( + ["source", "sources"], + textwrap.dedent( + """\ + A path match (`source`) or a list of path matches (`sources`) defining the + source path(s) to be installed. The path match(es) can use globs. Each match + is tried against default search directories. + - When a symlink is matched, then the symlink (not its target) is installed + as-is. When a directory is matched, then the directory is installed along + with all the contents that have not already been installed somewhere. + """ + ), + ), + documented_attr( + "into", + textwrap.dedent( + """\ + Either a package name or a list of package names for which these paths should be + installed as manpages. This key is conditional on whether there are multiple (non-`udeb`) + binary packages listed in `debian/control`. When there is only one (non-`udeb`) binary + package, then that binary is the default for `into`. Otherwise, the key is required. + """ + ), + ), + documented_attr( + "section", + textwrap.dedent( + """\ + If provided, it must be an integer between 1 and 9 (both inclusive), defining the + section the manpages belong overriding any auto-detection that `debputy` would + have performed. + """ + ), + ), + documented_attr( + "language", + textwrap.dedent( + """\ + If provided, it must be either a 2 letter language code (such as `de`), a 5 letter + language + dialect code (such as `pt_BR`), or one of the special keywords `C`, + `derive-from-path`, or `derive-from-basename`. The default is `derive-from-path`. + - When `language` is `C`, then the manpages are assumed to be "untranslated". + - When `language` is a language code (with or without dialect), then all manpages + matched will be assumed to be translated to that concrete language / dialect. + - When `language` is `derive-from-path`, then `debputy` attempts to derive the + language from the path (`man/<language>/man<section>`). This matches the + default of `dh_installman`. When no language can be found for a given source, + `debputy` behaves like language was `C`. + - When `language` is `derive-from-basename`, then `debputy` attempts to derive + the language from the basename (`foo.<language>.1`) similar to `dh_installman` + previous default. When no language can be found for a given source, `debputy` + behaves like language was `C`. Note this is prone to false positives where + `.pl`, `.so` or similar two-letter extensions gets mistaken for a language code + (`.pl` can both be "Polish" or "Perl Script", `.so` can both be "Somali" and + "Shared Object" documentation). In this configuration, such extensions are + always assumed to be a language. + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "install-manpages-install-man" + ), + ), + ) + api.plugable_manifest_rule( + InstallRule, + MK_INSTALLATIONS_DISCARD, + ParsedInstallDiscardRule, + _install_discard_rule_handler, + source_format=_with_alt_form(ParsedInstallDiscardRuleSourceFormat), + inline_reference_documentation=reference_documentation( + title="Discard (or exclude) upstream provided paths (`discard`)", + description=textwrap.dedent( + """\ + When installing paths from `debian/tmp` into packages, it might be useful to ignore + some paths that you never need installed. This can be done with the `discard` rule. + + Once a path is discarded, it cannot be matched by any other install rules. A path + that is discarded, is considered handled when `debputy` checks for paths you might + have forgotten to install. The `discard` feature is therefore *also* replaces the + `debian/not-installed` file used by `debhelper` and `cdbs`. + """ + ), + non_mapping_description=textwrap.dedent( + """\ + When the input is a string or a list of string, then that value is used as shorthand + for `path` or `paths` (respectively). + """ + ), + attributes=[ + documented_attr( + ["path", "paths"], + textwrap.dedent( + """\ + A path match (`path`) or a list of path matches (`paths`) defining the source + path(s) that should not be installed anywhere. The path match(es) can use globs. + - When a symlink is matched, then the symlink (not its target) is discarded as-is. + When a directory is matched, then the directory is discarded along with all the + contents that have not already been installed somewhere. + """ + ), + ), + documented_attr( + ["search_dir", "search_dirs"], + textwrap.dedent( + """\ + A path (`search-dir`) or a list to paths (`search-dirs`) that defines + which search directories apply to. This attribute is primarily useful + for source packages that uses "per package search dirs", and you want + to restrict a discard rule to a subset of the relevant search dirs. + Note all listed search directories must be either an explicit search + requested by the packager or a search directory that `debputy` + provided automatically (such as `debian/tmp`). Listing other paths + will make `debputy` report an error. + - Note that the `path` or `paths` must match at least one entry in + any of the search directories unless *none* of the search directories + exist (or the condition in `required-when` evaluates to false). When + none of the search directories exist, the discard rule is silently + skipped. This special-case enables you to have discard rules only + applicable to certain builds that are only performed conditionally. + """ + ), + ), + documented_attr( + "required_when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules](#conditional-rules). The discard + rule is always applied. When the conditional is present and evaluates to false, + the discard rule can silently match nothing.When the condition is absent, *or* + it evaluates to true, then each pattern provided must match at least one path. + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "discard-or-exclude-upstream-provided-paths-discard" + ), + ), + ) + api.plugable_manifest_rule( + InstallRule, + MK_INSTALLATIONS_MULTI_DEST_INSTALL, + ParsedMultiDestInstallRule, + _multi_dest_install_rule_handler, + source_format=ParsedMultiDestInstallRuleSourceFormat, + inline_reference_documentation=reference_documentation( + title=f"Multi destination install (`{MK_INSTALLATIONS_MULTI_DEST_INSTALL}`)", + description=textwrap.dedent( + """\ + The `{RULE_NAME}` is a variant of the generic `install` rule that installs sources + into multiple destination paths. This is needed for the rare case where you want a + path to be installed *twice* (or more) into the *same* package. The rule is a two + "primary" uses. + + 1) The classic "install into directory" similar to the standard `dh_install`, + except you list 2+ destination directories. + 2) The "install as" similar to `dh-exec`'s `foo => bar` feature, except you list + 2+ `as` names. + """.format( + RULE_NAME=MK_INSTALLATIONS_MULTI_DEST_INSTALL + ) + ), + attributes=[ + documented_attr( + ["source", "sources"], + textwrap.dedent( + """\ + A path match (`source`) or a list of path matches (`sources`) defining the + source path(s) to be installed. The path match(es) can use globs. Each match + is tried against default search directories. + - When a symlink is matched, then the symlink (not its target) is installed + as-is. When a directory is matched, then the directory is installed along + with all the contents that have not already been installed somewhere. + """ + ), + ), + documented_attr( + "dest_dirs", + textwrap.dedent( + """\ + A list of paths defining the destination *directories*. The value *cannot* use + globs, but can use substitution. It is mutually exclusive with `as` but must be + provided if `as` is not provided. The attribute must contain at least two paths + (if you do not have two paths, you want `install`). + """ + ), + ), + documented_attr( + "into", + textwrap.dedent( + """\ + Either a package name or a list of package names for which these paths should be + installed. This key is conditional on whether there are multiple binary packages listed + in `debian/control`. When there is only one binary package, then that binary is the + default for `into`. Otherwise, the key is required. + """ + ), + ), + documented_attr( + "install_as", + textwrap.dedent( + """\ + A list of paths, which defines all the places the source will be installed. + Each path must be a full path without globs (but can use substitution). + This option is mutually exclusive with `dest-dirs` and `sources` (but not + `source`). When `as` is given, then `source` must match exactly one + "not yet matched" path. The attribute must contain at least two paths + (if you do not have two paths, you want `install`). + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc("generic-install-install"), + ), + ) + + +def register_transformation_rules(api: DebputyPluginInitializerProvider) -> None: + api.plugable_manifest_rule( + TransformationRule, + "move", + TransformationMoveRuleSpec, + _transformation_move_handler, + inline_reference_documentation=reference_documentation( + title="Move transformation rule (`move`)", + description=textwrap.dedent( + """\ + The move transformation rule is mostly only useful for single binary source packages, + where everything from upstream's build system is installed automatically into the package. + In those case, you might find yourself with some files that need to be renamed to match + Debian specific requirements. + + This can be done with the `move` transformation rule, which is a rough emulation of the + `mv` command line tool. + """ + ), + attributes=[ + documented_attr( + "source", + textwrap.dedent( + """\ + A path match defining the source path(s) to be renamed. The value can use globs + and substitutions. + """ + ), + ), + documented_attr( + "target", + textwrap.dedent( + """\ + A path defining the target path. The value *cannot* use globs, but can use + substitution. If the target ends with a literal `/` (prior to substitution), + the target will *always* be a directory. + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "move-transformation-rule-move" + ), + ), + ) + api.plugable_manifest_rule( + TransformationRule, + "remove", + TransformationRemoveRuleSpec, + _transformation_remove_handler, + source_format=_with_alt_form(TransformationRemoveRuleInputFormat), + inline_reference_documentation=reference_documentation( + title="Remove transformation rule (`remove`)", + description=textwrap.dedent( + """\ + The remove transformation rule is mostly only useful for single binary source packages, + where everything from upstream's build system is installed automatically into the package. + In those case, you might find yourself with some files that are _not_ relevant for the + Debian package (but would be relevant for other distros or for non-distro local builds). + Common examples include `INSTALL` files or `LICENSE` files (when they are just a subset + of `debian/copyright`). + + In the manifest, you can ask `debputy` to remove paths from the debian package by using + the `remove` transformation rule. + + Note that `remove` removes paths from future glob matches and transformation rules. + """ + ), + non_mapping_description=textwrap.dedent( + """\ + When the input is a string or a list of string, then that value is used as shorthand + for `path` or `paths` (respectively). + """ + ), + attributes=[ + documented_attr( + ["path", "paths"], + textwrap.dedent( + """\ + A path match (`path`) or a list of path matches (`paths`) defining the + path(s) inside the package that should be removed. The path match(es) + can use globs. + - When a symlink is matched, then the symlink (not its target) is removed + as-is. When a directory is matched, then the directory is removed + along with all the contents. + """ + ), + ), + documented_attr( + "keep_empty_parent_dirs", + textwrap.dedent( + """\ + A boolean determining whether to prune parent directories that become + empty as a consequence of this rule. When provided and `true`, this + rule will leave empty directories behind. Otherwise, if this rule + causes a directory to become empty that directory will be removed. + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + This condition will be combined with the built-in condition provided by these rules + (rather than replacing it). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "remove-transformation-rule-remove" + ), + ), + ) + api.plugable_manifest_rule( + TransformationRule, + "create-symlink", + CreateSymlinkRule, + _transformation_create_symlink, + inline_reference_documentation=reference_documentation( + title="Create symlinks transformation rule (`create-symlink`)", + description=textwrap.dedent( + """\ + Often, the upstream build system will provide the symlinks for you. However, + in some cases, it is useful for the packager to define distribution specific + symlinks. This can be done via the `create-symlink` transformation rule. + """ + ), + attributes=[ + documented_attr( + "path", + textwrap.dedent( + """\ + The path that should be a symlink. The path may contain substitution + variables such as `{{DEB_HOST_MULTIARCH}}` but _cannot_ use globs. + Parent directories are implicitly created as necessary. + * Note that if `path` already exists, the behaviour of this + transformation depends on the value of `replacement-rule`. + """ + ), + ), + documented_attr( + "target", + textwrap.dedent( + """\ + Where the symlink should point to. The target may contain substitution + variables such as `{{DEB_HOST_MULTIARCH}}` but _cannot_ use globs. + The link target is _not_ required to exist inside the package. + * The `debputy` tool will normalize the target according to the rules + of the Debian Policy. Use absolute or relative target at your own + preference. + """ + ), + ), + documented_attr( + "replacement_rule", + textwrap.dedent( + """\ + This attribute defines how to handle if `path` already exists. It can + be set to one of the following values: + - `error-if-exists`: When `path` already exists, `debputy` will + stop with an error. This is similar to `ln -s` semantics. + - `error-if-directory`: When `path` already exists, **and** it is + a directory, `debputy` will stop with an error. Otherwise, + remove the `path` first and then create the symlink. This is + similar to `ln -sf` semantics. + - `abort-on-non-empty-directory` (default): When `path` already + exists, then it will be removed provided it is a non-directory + **or** an *empty* directory and the symlink will then be + created. If the path is a *non-empty* directory, `debputy` + will stop with an error. + - `discard-existing`: When `path` already exists, it will be + removed. If the `path` is a directory, all its contents will + be removed recursively along with the directory. Finally, + the symlink is created. This is similar to having an explicit + `remove` rule just prior to the `create-symlink` that is + conditional on `path` existing (plus the condition defined in + `when` if any). + + Keep in mind, that `replacement-rule` only applies if `path` exists. + If the symlink cannot be created, because a part of `path` exist and + is *not* a directory, then `create-symlink` will fail regardless of + the value in `replacement-rule`. + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "create-symlinks-transformation-rule-create-symlink" + ), + ), + ) + api.plugable_manifest_rule( + TransformationRule, + "path-metadata", + PathManifestRule, + _transformation_path_metadata, + source_format=PathManifestSourceDictFormat, + inline_reference_documentation=reference_documentation( + title="Change path owner/group or mode (`path-metadata`)", + description=textwrap.dedent( + """\ + The `debputy` command normalizes the path metadata (such as ownership and mode) similar + to `dh_fixperms`. For most packages, the default is what you want. However, in some + cases, the package has a special case or two that `debputy` does not cover. In that + case, you can tell `debputy` to use the metadata you want by using the `path-metadata` + transformation. + + Common use-cases include setuid/setgid binaries (such `usr/bin/sudo`) or/and static + ownership (such as /usr/bin/write). + """ + ), + attributes=[ + documented_attr( + ["path", "paths"], + textwrap.dedent( + """\ + A path match (`path`) or a list of path matches (`paths`) defining the path(s) + inside the package that should be affected. The path match(es) can use globs + and substitution variables. Special-rules for matches: + - Symlinks are never followed and will never be matched by this rule. + - Directory handling depends on the `recursive` attribute. + """ + ), + ), + documented_attr( + "owner", + textwrap.dedent( + """\ + Denotes the owner of the paths matched by `path` or `paths`. When omitted, + no change of owner is done. + """ + ), + ), + documented_attr( + "group", + textwrap.dedent( + """\ + Denotes the group of the paths matched by `path` or `paths`. When omitted, + no change of group is done. + """ + ), + ), + documented_attr( + "mode", + textwrap.dedent( + """\ + Denotes the mode of the paths matched by `path` or `paths`. When omitted, + no change in mode is done. Note that numeric mode must always be given as + a string (i.e., with quotes). Symbolic mode can be used as well. If + symbolic mode uses a relative definition (e.g., `o-rx`), then it is + relative to the matched path's current mode. + """ + ), + ), + documented_attr( + "capabilities", + textwrap.dedent( + """\ + Denotes a Linux capability that should be applied to the path. When provided, + `debputy` will cause the capability to be applied to all *files* denoted by + the `path`/`paths` attribute on install (via `postinst configure`) provided + that `setcap` is installed on the system when the `postinst configure` is + run. + - If any non-file paths are matched, the `capabilities` will *not* be applied + to those paths. + + """ + ), + ), + documented_attr( + "capability_mode", + textwrap.dedent( + """\ + Denotes the mode to apply to the path *if* the Linux capability denoted in + `capabilities` was successfully applied. If omitted, it defaults to `a-s` as + generally capabilities are used to avoid "setuid"/"setgid" binaries. The + `capability-mode` is relative to the *final* path mode (the mode of the path + in the produced `.deb`). The `capability-mode` attribute cannot be used if + `capabilities` is omitted. + """ + ), + ), + documented_attr( + "recursive", + textwrap.dedent( + """\ + When a directory is matched, then the metadata changes are applied to the + directory itself. When `recursive` is `true`, then the transformation is + *also* applied to all paths beneath the directory. The default value for + this attribute is `false`. + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "change-path-ownergroup-or-mode-path-metadata" + ), + ), + ) + api.plugable_manifest_rule( + TransformationRule, + "create-directories", + EnsureDirectoryRule, + _transformation_mkdirs, + source_format=_with_alt_form(EnsureDirectorySourceFormat), + inline_reference_documentation=reference_documentation( + title="Create directories transformation rule (`create-directories`)", + description=textwrap.dedent( + """\ + NOTE: This transformation is only really needed if you need to create an empty + directory somewhere in your package as an integration point. All `debputy` + transformations will create directories as required. + + In most cases, upstream build systems and `debputy` will create all the relevant + directories. However, in some rare cases you may want to explicitly define a path + to be a directory. Maybe to silence a linter that is warning you about a directory + being empty, or maybe you need an empty directory that nothing else is creating for + you. This can be done via the `create-directories` transformation rule. + + Unless you have a specific need for the mapping form, you are recommended to use the + shorthand form of just listing the directories you want created. + """ + ), + non_mapping_description=textwrap.dedent( + """\ + When the input is a string or a list of string, then that value is used as shorthand + for `path` or `paths` (respectively). + """ + ), + attributes=[ + documented_attr( + ["path", "paths"], + textwrap.dedent( + """\ + A path (`path`) or a list of path (`paths`) defining the path(s) inside the + package that should be created as directories. The path(es) _cannot_ use globs + but can use substitution variables. Parent directories are implicitly created + (with owner `root:root` and mode `0755` - only explicitly listed directories + are affected by the owner/mode options) + """ + ), + ), + documented_attr( + "owner", + textwrap.dedent( + """\ + Denotes the owner of the directory (but _not_ what is inside the directory). + Default is "root". + """ + ), + ), + documented_attr( + "group", + textwrap.dedent( + """\ + Denotes the group of the directory (but _not_ what is inside the directory). + Default is "root". + """ + ), + ), + documented_attr( + "mode", + textwrap.dedent( + """\ + Denotes the mode of the directory (but _not_ what is inside the directory). + Note that numeric mode must always be given as a string (i.e., with quotes). + Symbolic mode can be used as well. If symbolic mode uses a relative + definition (e.g., `o-rx`), then it is relative to the directory's current mode + (if it already exists) or `0755` if the directory is created by this + transformation. The default is "0755". + """ + ), + ), + documented_attr( + "when", + textwrap.dedent( + """\ + A condition as defined in [Conditional rules]({MANIFEST_FORMAT_DOC}#Conditional rules). + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "create-directories-transformation-rule-directories" + ), + ), + ) + + +def register_manifest_condition_rules(api: DebputyPluginInitializerProvider) -> None: + api.provide_manifest_keyword( + ManifestCondition, + "cross-compiling", + lambda *_: ManifestCondition.is_cross_building(), + inline_reference_documentation=reference_documentation( + title="Cross-Compiling condition `cross-compiling`", + description=textwrap.dedent( + """\ + The `cross-compiling` condition is used to determine if the current build is + performing a cross build (i.e., `DEB_BUILD_GNU_TYPE` != `DEB_HOST_GNU_TYPE`). + Often this has consequences for what is possible to do. + + Note if you specifically want to know: + + * whether build-time tests should be run, then please use the + `run-build-time-tests` condition. + * whether compiled binaries can be run as if it was a native binary, please + use the `can-execute-compiled-binaries` condition instead. That condition + accounts for cross-building in its evaluation. + """ + ), + reference_documentation_url=_manifest_format_doc( + "cross-compiling-condition-cross-compiling-string" + ), + ), + ) + api.provide_manifest_keyword( + ManifestCondition, + "can-execute-compiled-binaries", + lambda *_: ManifestCondition.can_execute_compiled_binaries(), + inline_reference_documentation=reference_documentation( + title="Can run produced binaries `can-execute-compiled-binaries`", + description=textwrap.dedent( + """\ + The `can-execute-compiled-binaries` condition is used to assert the build + can assume that all compiled binaries can be run as-if they were native + binaries. For native builds, this condition always evaluates to `true`. + For cross builds, the condition is generally evaluates to `false`. However, + there are special-cases where binaries can be run during cross-building. + Accordingly, this condition is subtly different from the `cross-compiling` + condition. + + Note this condition should *not* be used when you know the binary has been + built for the build architecture (`DEB_BUILD_ARCH`) or for determining + whether build-time tests should be run (for build-time tests, please use + the `run-build-time-tests` condition instead). Some upstream build systems + are advanced enough to distinguish building a final product vs. building + a helper tool that needs to run during build. The latter will often be + compiled by a separate compiler (often using `$(CC_FOR_BUILD)`, + `cc_for_build` or similar variable names in upstream build systems for + that compiler). + """ + ), + reference_documentation_url=_manifest_format_doc( + "can-run-produced-binaries-can-execute-compiled-binaries-string" + ), + ), + ) + api.provide_manifest_keyword( + ManifestCondition, + "run-build-time-tests", + lambda *_: ManifestCondition.run_build_time_tests(), + inline_reference_documentation=reference_documentation( + title="Whether build time tests should be run `run-build-time-tests`", + description=textwrap.dedent( + """\ + The `run-build-time-tests` condition is used to determine whether (build + time) tests should be run for this build. This condition roughly + translates into whether `nocheck` is present in `DEB_BUILD_OPTIONS`. + + In general, the manifest *should not* prevent build time tests from being + run during cross-builds. + """ + ), + reference_documentation_url=_manifest_format_doc( + "whether-build-time-tests-should-be-run-run-build-time-tests-string" + ), + ), + ) + + api.plugable_manifest_rule( + ManifestCondition, + "not", + MCNot, + _mc_not, + inline_reference_documentation=reference_documentation( + title="Negated condition `not` (mapping)", + description=textwrap.dedent( + """\ + It is possible to negate a condition via the `not` condition. + + As an example: + + packages: + util-linux: + transformations: + - create-symlink + path: sbin/getty + target: /sbin/agetty + when: + # On Hurd, the package "hurd" ships "sbin/getty". + # This example happens to also be alternative to `arch-marches: '!hurd-any` + not: + arch-matches: 'hurd-any' + + The `not` condition is specified as a mapping, where the key is `not` and the + value is a nested condition. + """ + ), + attributes=[ + documented_attr( + "negated_condition", + textwrap.dedent( + """\ + The condition to be negated. + """ + ), + ), + ], + reference_documentation_url=_manifest_format_doc( + "whether-build-time-tests-should-be-run-run-build-time-tests-string" + ), + ), + ) + api.plugable_manifest_rule( + ManifestCondition, + ["any-of", "all-of"], + MCAnyOfAllOf, + _mc_any_of, + source_format=List[ManifestCondition], + inline_reference_documentation=reference_documentation( + title="All or any of a list of conditions `all-of`/`any-of`", + description=textwrap.dedent( + """\ + It is possible to aggregate conditions using the `all-of` or `any-of` + condition. This provide `X and Y` and `X or Y` semantics (respectively). + """ + ), + reference_documentation_url=_manifest_format_doc( + "all-or-any-of-a-list-of-conditions-all-ofany-of-list" + ), + ), + ) + api.plugable_manifest_rule( + ManifestCondition, + "arch-matches", + MCArchMatches, + _mc_arch_matches, + source_format=str, + inline_reference_documentation=reference_documentation( + title="Architecture match condition `arch-matches`", + description=textwrap.dedent( + """\ + Sometimes, a rule needs to be conditional on the architecture. + This can be done by using the `arch-matches` rule. In 99.99% + of the cases, `arch-matches` will be form you are looking for + and practically behaves like a comparison against + `dpkg-architecture -qDEB_HOST_ARCH`. + + For the cross-compiling specialists or curious people: The + `arch-matches` rule behaves like a `package-context-arch-matches` + in the context of a binary package and like + `source-context-arch-matches` otherwise. The details of those + are covered in their own keywords. + """ + ), + non_mapping_description=textwrap.dedent( + """\ + The value must be a string in the form of a space separated list + architecture names or architecture wildcards (same syntax as the + architecture restriction in Build-Depends in debian/control except + there is no enclosing `[]` brackets). The names/wildcards can + optionally be prefixed by `!` to negate them. However, either + *all* names / wildcards must have negation or *none* of them may + have it. + """ + ), + reference_documentation_url=_manifest_format_doc( + "architecture-match-condition-arch-matches-mapping" + ), + ), + ) + + context_arch_doc = reference_documentation( + title="Explicit source or binary package context architecture match condition" + " `source-context-arch-matches`, `package-context-arch-matches` (mapping)", + description=textwrap.dedent( + """\ + **These are special-case conditions**. Unless you know that you have a very special-case, + you should probably use `arch-matches` instead. These conditions are aimed at people with + corner-case special architecture needs. It also assumes the reader is familiar with the + `arch-matches` condition. + + To understand these rules, here is a quick primer on `debputy`'s concept of "source context" + vs "(binary) package context" architecture. For a native build, these two contexts are the + same except that in the package context an `Architecture: all` package always resolve to + `all` rather than `DEB_HOST_ARCH`. As a consequence, `debputy` forbids `arch-matches` and + `package-context-arch-matches` in the context of an `Architecture: all` package as a warning + to the packager that condition does not make sense. + + In the very rare case that you need an architecture condition for an `Architecture: all` package, + you can use `source-context-arch-matches`. However, this means your `Architecture: all` package + is not reproducible between different build hosts (which has known to be relevant for some + very special cases). + + Additionally, for the 0.0001% case you are building a cross-compiling compiler (that is, + `DEB_HOST_ARCH != DEB_TARGET_ARCH` and you are working with `gcc` or similar) `debputy` can be + instructed (opt-in) to use `DEB_TARGET_ARCH` rather than `DEB_HOST_ARCH` for certain packages when + evaluating an architecture condition in context of a binary package. This can be useful if the + compiler produces supporting libraries that need to be built for the `DEB_TARGET_ARCH` rather than + the `DEB_HOST_ARCH`. This is where `arch-matches` or `package-context-arch-matches` can differ + subtly from `source-context-arch-matches` in how they evaluate the condition. This opt-in currently + relies on setting `X-DH-Build-For-Type: target` for each of the relevant packages in + `debian/control`. However, unless you are a cross-compiling specialist, you will probably never + need to care about nor use any of this. + + Accordingly, the possible conditions are: + + * `arch-matches`: This is the form recommended to laymen and as the default use-case. This + conditional acts `package-context-arch-matches` if the condition is used in the context + of a binary package. Otherwise, it acts as `source-context-arch-matches`. + + * `source-context-arch-matches`: With this conditional, the provided architecture constraint is compared + against the build time provided host architecture (`dpkg-architecture -qDEB_HOST_ARCH`). This can + be useful when an `Architecture: all` package needs an architecture condition for some reason. + + * `package-context-arch-matches`: With this conditional, the provided architecture constraint is compared + against the package's resolved architecture. This condition can only be used in the context of a binary + package (usually, under `packages.<name>.`). If the package is an `Architecture: all` package, the + condition will fail with an error as the condition always have the same outcome. For all other + packages, the package's resolved architecture is the same as the build time provided host architecture + (`dpkg-architecture -qDEB_HOST_ARCH`). + + - However, as noted above there is a special case for when compiling a cross-compiling compiler, where + this behaves subtly different from `source-context-arch-matches`. + + All conditions are used the same way as `arch-matches`. Simply replace `arch-matches` with the other + condition. See the `arch-matches` description for an example. + """ + ), + non_mapping_description=textwrap.dedent( + """\ + The value must be a string in the form of a space separated list + architecture names or architecture wildcards (same syntax as the + architecture restriction in Build-Depends in debian/control except + there is no enclosing `[]` brackets). The names/wildcards can + optionally be prefixed by `!` to negate them. However, either + *all* names / wildcards must have negation or *none* of them may + have it. + """ + ), + ) + + api.plugable_manifest_rule( + ManifestCondition, + "source-context-arch-matches", + MCArchMatches, + _mc_source_context_arch_matches, + source_format=str, + inline_reference_documentation=context_arch_doc, + ) + api.plugable_manifest_rule( + ManifestCondition, + "package-context-arch-matches", + MCArchMatches, + _mc_arch_matches, + source_format=str, + inline_reference_documentation=context_arch_doc, + ) + api.plugable_manifest_rule( + ManifestCondition, + "build-profiles-matches", + MCBuildProfileMatches, + _mc_build_profile_matches, + source_format=str, + inline_reference_documentation=reference_documentation( + title="Active build profile match condition `build-profiles-matches`", + description=textwrap.dedent( + """\ + The `build-profiles-matches` condition is used to assert whether the + active build profiles (`DEB_BUILD_PROFILES` / `dpkg-buildpackage -P`) + matches a given build profile restriction. + """ + ), + non_mapping_description=textwrap.dedent( + """\ + The value is a string using the same syntax as the `Build-Profiles` + field from `debian/control` (i.e., a space separated list of + `<[!]profile ...>` groups). + """ + ), + reference_documentation_url=_manifest_format_doc( + "active-build-profile-match-condition-build-profiles-matches-mapping" + ), + ), + ) + + +def register_dpkg_conffile_rules(api: DebputyPluginInitializerProvider) -> None: + api.plugable_manifest_rule( + DpkgMaintscriptHelperCommand, + "remove", + DpkgRemoveConffileRule, + _dpkg_conffile_remove, + inline_reference_documentation=None, # TODO: write and add + ) + + api.plugable_manifest_rule( + DpkgMaintscriptHelperCommand, + "rename", + DpkgRenameConffileRule, + _dpkg_conffile_rename, + inline_reference_documentation=None, # TODO: write and add + ) + + +class _ModeOwnerBase(DebputyParsedContentStandardConditional): + mode: NotRequired[FileSystemMode] + owner: NotRequired[StaticFileSystemOwner] + group: NotRequired[StaticFileSystemGroup] + + +class PathManifestSourceDictFormat(_ModeOwnerBase): + path: NotRequired[ + Annotated[FileSystemMatchRule, DebputyParseHint.target_attribute("paths")] + ] + paths: NotRequired[List[FileSystemMatchRule]] + recursive: NotRequired[bool] + capabilities: NotRequired[str] + capability_mode: NotRequired[FileSystemMode] + + +class PathManifestRule(_ModeOwnerBase): + paths: List[FileSystemMatchRule] + recursive: NotRequired[bool] + capabilities: NotRequired[str] + capability_mode: NotRequired[FileSystemMode] + + +class EnsureDirectorySourceFormat(_ModeOwnerBase): + path: NotRequired[ + Annotated[FileSystemExactMatchRule, DebputyParseHint.target_attribute("paths")] + ] + paths: NotRequired[List[FileSystemExactMatchRule]] + + +class EnsureDirectoryRule(_ModeOwnerBase): + paths: List[FileSystemExactMatchRule] + + +class CreateSymlinkRule(DebputyParsedContentStandardConditional): + path: FileSystemExactMatchRule + target: Annotated[SymlinkTarget, DebputyParseHint.not_path_error_hint()] + replacement_rule: NotRequired[CreateSymlinkReplacementRule] + + +class TransformationMoveRuleSpec(DebputyParsedContentStandardConditional): + source: FileSystemMatchRule + target: FileSystemExactMatchRule + + +class TransformationRemoveRuleSpec(DebputyParsedContentStandardConditional): + paths: List[FileSystemMatchRule] + keep_empty_parent_dirs: NotRequired[bool] + + +class TransformationRemoveRuleInputFormat(DebputyParsedContentStandardConditional): + path: NotRequired[ + Annotated[FileSystemMatchRule, DebputyParseHint.target_attribute("paths")] + ] + paths: NotRequired[List[FileSystemMatchRule]] + keep_empty_parent_dirs: NotRequired[bool] + + +class ParsedInstallRuleSourceFormat(DebputyParsedContentStandardConditional): + sources: NotRequired[List[FileSystemMatchRule]] + source: NotRequired[ + Annotated[FileSystemMatchRule, DebputyParseHint.target_attribute("sources")] + ] + into: NotRequired[ + Annotated[ + Union[str, List[str]], + DebputyParseHint.required_when_multi_binary(), + ] + ] + dest_dir: NotRequired[ + Annotated[FileSystemExactMatchRule, DebputyParseHint.not_path_error_hint()] + ] + install_as: NotRequired[ + Annotated[ + FileSystemExactMatchRule, + DebputyParseHint.conflicts_with_source_attributes("sources", "dest_dir"), + DebputyParseHint.manifest_attribute("as"), + DebputyParseHint.not_path_error_hint(), + ] + ] + + +class ParsedInstallDocRuleSourceFormat(DebputyParsedContentStandardConditional): + sources: NotRequired[List[FileSystemMatchRule]] + source: NotRequired[ + Annotated[FileSystemMatchRule, DebputyParseHint.target_attribute("sources")] + ] + into: NotRequired[ + Annotated[ + Union[str, List[str]], + DebputyParseHint.required_when_multi_binary(package_type="deb"), + ] + ] + dest_dir: NotRequired[ + Annotated[FileSystemExactMatchRule, DebputyParseHint.not_path_error_hint()] + ] + install_as: NotRequired[ + Annotated[ + FileSystemExactMatchRule, + DebputyParseHint.conflicts_with_source_attributes("sources", "dest_dir"), + DebputyParseHint.manifest_attribute("as"), + DebputyParseHint.not_path_error_hint(), + ] + ] + + +class ParsedInstallRule(DebputyParsedContentStandardConditional): + sources: List[FileSystemMatchRule] + into: NotRequired[List[BinaryPackage]] + dest_dir: NotRequired[FileSystemExactMatchRule] + install_as: NotRequired[FileSystemExactMatchRule] + + +class ParsedMultiDestInstallRuleSourceFormat(DebputyParsedContentStandardConditional): + sources: NotRequired[List[FileSystemMatchRule]] + source: NotRequired[ + Annotated[FileSystemMatchRule, DebputyParseHint.target_attribute("sources")] + ] + into: NotRequired[ + Annotated[ + Union[str, List[str]], + DebputyParseHint.required_when_multi_binary(), + ] + ] + dest_dirs: NotRequired[ + Annotated[ + List[FileSystemExactMatchRule], DebputyParseHint.not_path_error_hint() + ] + ] + install_as: NotRequired[ + Annotated[ + List[FileSystemExactMatchRule], + DebputyParseHint.conflicts_with_source_attributes("sources", "dest_dirs"), + DebputyParseHint.not_path_error_hint(), + DebputyParseHint.manifest_attribute("as"), + ] + ] + + +class ParsedMultiDestInstallRule(DebputyParsedContentStandardConditional): + sources: List[FileSystemMatchRule] + into: NotRequired[List[BinaryPackage]] + dest_dirs: NotRequired[List[FileSystemExactMatchRule]] + install_as: NotRequired[List[FileSystemExactMatchRule]] + + +class ParsedInstallExamplesRule(DebputyParsedContentStandardConditional): + sources: List[FileSystemMatchRule] + into: NotRequired[List[BinaryPackage]] + + +class ParsedInstallExamplesRuleSourceFormat(DebputyParsedContentStandardConditional): + sources: NotRequired[List[FileSystemMatchRule]] + source: NotRequired[ + Annotated[FileSystemMatchRule, DebputyParseHint.target_attribute("sources")] + ] + into: NotRequired[ + Annotated[ + Union[str, List[str]], + DebputyParseHint.required_when_multi_binary(package_type="deb"), + ] + ] + + +class ParsedInstallManpageRule(DebputyParsedContentStandardConditional): + sources: List[FileSystemMatchRule] + language: NotRequired[str] + section: NotRequired[int] + into: NotRequired[List[BinaryPackage]] + + +class ParsedInstallManpageRuleSourceFormat(DebputyParsedContentStandardConditional): + sources: NotRequired[List[FileSystemMatchRule]] + source: NotRequired[ + Annotated[FileSystemMatchRule, DebputyParseHint.target_attribute("sources")] + ] + language: NotRequired[str] + section: NotRequired[int] + into: NotRequired[ + Annotated[ + Union[str, List[str]], + DebputyParseHint.required_when_multi_binary(package_type="deb"), + ] + ] + + +class ParsedInstallDiscardRuleSourceFormat(DebputyParsedContent): + paths: NotRequired[List[FileSystemMatchRule]] + path: NotRequired[ + Annotated[FileSystemMatchRule, DebputyParseHint.target_attribute("paths")] + ] + search_dir: NotRequired[ + Annotated[ + FileSystemExactMatchRule, DebputyParseHint.target_attribute("search_dirs") + ] + ] + search_dirs: NotRequired[List[FileSystemExactMatchRule]] + required_when: NotRequired[ManifestCondition] + + +class ParsedInstallDiscardRule(DebputyParsedContent): + paths: List[FileSystemMatchRule] + search_dirs: NotRequired[List[FileSystemExactMatchRule]] + required_when: NotRequired[ManifestCondition] + + +class DpkgConffileManagementRuleBase(DebputyParsedContent): + prior_to_version: NotRequired[str] + owning_package: NotRequired[str] + + +class DpkgRenameConffileRule(DpkgConffileManagementRuleBase): + source: str + target: str + + +class DpkgRemoveConffileRule(DpkgConffileManagementRuleBase): + path: str + + +class MCAnyOfAllOf(DebputyParsedContent): + conditions: List[ManifestCondition] + + +class MCNot(DebputyParsedContent): + negated_condition: Annotated[ + ManifestCondition, DebputyParseHint.manifest_attribute("not") + ] + + +class MCArchMatches(DebputyParsedContent): + arch_matches: str + + +class MCBuildProfileMatches(DebputyParsedContent): + build_profile_matches: str + + +def _parse_filename( + filename: str, + attribute_path: AttributePath, + *, + allow_directories: bool = True, +) -> str: + try: + normalized_path = _normalize_path(filename, with_prefix=False) + except ValueError as e: + raise ManifestParseException( + f'Error parsing the path "{filename}" defined in {attribute_path.path}: {e.args[0]}' + ) from None + if not allow_directories and filename.endswith("/"): + raise ManifestParseException( + f'The path "{filename}" in {attribute_path.path} ends with "/" implying it is a directory,' + f" but this feature can only be used for files" + ) + if normalized_path == ".": + raise ManifestParseException( + f'The path "{filename}" in {attribute_path.path} looks like the root directory,' + f" but this feature does not allow the root directory here." + ) + return normalized_path + + +def _with_alt_form(t: Type[TypedDict]): + return Union[ + t, + List[str], + str, + ] + + +def _dpkg_conffile_rename( + _name: str, + parsed_data: DpkgRenameConffileRule, + path: AttributePath, + _context: ParserContextData, +) -> DpkgMaintscriptHelperCommand: + source_file = parsed_data["source"] + target_file = parsed_data["target"] + normalized_source = _parse_filename( + source_file, + path["source"], + allow_directories=False, + ) + path.path_hint = source_file + + normalized_target = _parse_filename( + target_file, + path["target"], + allow_directories=False, + ) + normalized_source = "/" + normalized_source + normalized_target = "/" + normalized_target + + if normalized_source == normalized_target: + raise ManifestParseException( + f"Invalid rename defined in {path.path}: The source and target path are the same!" + ) + + version, owning_package = _parse_conffile_prior_version_and_owning_package( + parsed_data, path + ) + return DpkgMaintscriptHelperCommand.mv_conffile( + path, + normalized_source, + normalized_target, + version, + owning_package, + ) + + +def _dpkg_conffile_remove( + _name: str, + parsed_data: DpkgRemoveConffileRule, + path: AttributePath, + _context: ParserContextData, +) -> DpkgMaintscriptHelperCommand: + source_file = parsed_data["path"] + normalized_source = _parse_filename( + source_file, + path["path"], + allow_directories=False, + ) + path.path_hint = source_file + + normalized_source = "/" + normalized_source + + version, owning_package = _parse_conffile_prior_version_and_owning_package( + parsed_data, path + ) + return DpkgMaintscriptHelperCommand.rm_conffile( + path, + normalized_source, + version, + owning_package, + ) + + +def _parse_conffile_prior_version_and_owning_package( + d: DpkgConffileManagementRuleBase, + attribute_path: AttributePath, +) -> Tuple[Optional[str], Optional[str]]: + prior_version = d.get("prior_to_version") + owning_package = d.get("owning_package") + + if prior_version is not None and not PKGVERSION_REGEX.match(prior_version): + p = attribute_path["prior_to_version"] + raise ManifestParseException( + f"The {MK_CONFFILE_MANAGEMENT_X_PRIOR_TO_VERSION} parameter in {p.path} must be a" + r" valid package version (i.e., match (?:\d+:)?\d[0-9A-Za-z.+:~]*(?:-[0-9A-Za-z.+:~]+)*)." + ) + + if owning_package is not None and not PKGNAME_REGEX.match(owning_package): + p = attribute_path["owning_package"] + raise ManifestParseException( + f"The {MK_CONFFILE_MANAGEMENT_X_OWNING_PACKAGE} parameter in {p.path} must be a valid" + f" package name (i.e., match {PKGNAME_REGEX.pattern})." + ) + + return prior_version, owning_package + + +def _install_rule_handler( + _name: str, + parsed_data: ParsedInstallRule, + path: AttributePath, + context: ParserContextData, +) -> InstallRule: + sources = parsed_data["sources"] + install_as = parsed_data.get("install_as") + into = parsed_data.get("into") + dest_dir = parsed_data.get("dest_dir") + condition = parsed_data.get("when") + if not into: + into = [context.single_binary_package(path, package_attribute="into")] + into = frozenset(into) + if install_as is not None: + assert len(sources) == 1 + assert dest_dir is None + return InstallRule.install_as( + sources[0], + install_as.match_rule.path, + into, + path.path, + condition, + ) + return InstallRule.install_dest( + sources, + dest_dir.match_rule.path if dest_dir is not None else None, + into, + path.path, + condition, + ) + + +def _multi_dest_install_rule_handler( + _name: str, + parsed_data: ParsedMultiDestInstallRule, + path: AttributePath, + context: ParserContextData, +) -> InstallRule: + sources = parsed_data["sources"] + install_as = parsed_data.get("install_as") + into = parsed_data.get("into") + dest_dirs = parsed_data.get("dest_dirs") + condition = parsed_data.get("when") + if not into: + into = [context.single_binary_package(path, package_attribute="into")] + into = frozenset(into) + if install_as is not None: + assert len(sources) == 1 + assert dest_dirs is None + if len(install_as) < 2: + raise ManifestParseException( + f"The {path['install_as'].path} attribute must contain at least two paths." + ) + return InstallRule.install_multi_as( + sources[0], + [p.match_rule.path for p in install_as], + into, + path.path, + condition, + ) + if dest_dirs is None: + raise ManifestParseException( + f"Either the `as` or the `dest-dirs` key must be provided at {path.path}" + ) + if len(dest_dirs) < 2: + raise ManifestParseException( + f"The {path['dest_dirs'].path} attribute must contain at least two paths." + ) + return InstallRule.install_multi_dest( + sources, + [dd.match_rule.path for dd in dest_dirs], + into, + path.path, + condition, + ) + + +def _install_docs_rule_handler( + _name: str, + parsed_data: ParsedInstallRule, + path: AttributePath, + context: ParserContextData, +) -> InstallRule: + sources = parsed_data["sources"] + install_as = parsed_data.get("install_as") + into = parsed_data.get("into") + dest_dir = parsed_data.get("dest_dir") + condition = parsed_data.get("when") + if not into: + into = [ + context.single_binary_package( + path, package_type="deb", package_attribute="into" + ) + ] + into = frozenset(into) + if install_as is not None: + assert len(sources) == 1 + assert dest_dir is None + return InstallRule.install_doc_as( + sources[0], + install_as.match_rule.path, + into, + path.path, + condition, + ) + return InstallRule.install_doc( + sources, + dest_dir, + into, + path.path, + condition, + ) + + +def _install_examples_rule_handler( + _name: str, + parsed_data: ParsedInstallExamplesRule, + path: AttributePath, + context: ParserContextData, +) -> InstallRule: + sources = parsed_data["sources"] + into = parsed_data.get("into") + if not into: + into = [ + context.single_binary_package( + path, package_type="deb", package_attribute="into" + ) + ] + condition = parsed_data.get("when") + into = frozenset(into) + return InstallRule.install_examples( + sources, + into, + path.path, + condition, + ) + + +def _install_man_rule_handler( + _name: str, + parsed_data: ParsedInstallManpageRule, + attribute_path: AttributePath, + context: ParserContextData, +) -> InstallRule: + sources = parsed_data["sources"] + language = parsed_data.get("language") + section = parsed_data.get("section") + + if language is not None: + is_lang_ok = language in ( + "C", + "derive-from-basename", + "derive-from-path", + ) + + if not is_lang_ok and len(language) == 2 and language.islower(): + is_lang_ok = True + + if ( + not is_lang_ok + and len(language) == 5 + and language[2] == "_" + and language[:2].islower() + and language[3:].isupper() + ): + is_lang_ok = True + + if not is_lang_ok: + raise ManifestParseException( + f'The language attribute must in a 2-letter language code ("de"), a 5-letter language + dialect' + f' code ("pt_BR"), "derive-from-basename", "derive-from-path", or omitted. The problematic' + f' definition is {attribute_path["language"]}' + ) + + if section is not None and (section < 1 or section > 10): + raise ManifestParseException( + f"The section attribute must in the range [1-9] or omitted. The problematic definition is" + f' {attribute_path["section"]}' + ) + if section is None and any(s.raw_match_rule.endswith(".gz") for s in sources): + raise ManifestParseException( + "Sorry, compressed manpages are not supported without an explicit `section` definition at the moment." + " This limitation may be removed in the future. Problematic definition from" + f' {attribute_path["sources"]}' + ) + if any(s.raw_match_rule.endswith("/") for s in sources): + raise ManifestParseException( + 'The install-man rule can only match non-directories. Therefore, none of the sources can end with "/".' + " as that implies the source is for a directory. Problematic definition from" + f' {attribute_path["sources"]}' + ) + into = parsed_data.get("into") + if not into: + into = [ + context.single_binary_package( + attribute_path, package_type="deb", package_attribute="into" + ) + ] + condition = parsed_data.get("when") + into = frozenset(into) + return InstallRule.install_man( + sources, + into, + section, + language, + attribute_path.path, + condition, + ) + + +def _install_discard_rule_handler( + _name: str, + parsed_data: ParsedInstallDiscardRule, + path: AttributePath, + _context: ParserContextData, +) -> InstallRule: + limit_to = parsed_data.get("search_dirs") + if limit_to is not None and not limit_to: + p = path["search_dirs"] + raise ManifestParseException(f"The {p.path} attribute must not be empty.") + condition = parsed_data.get("required_when") + return InstallRule.discard_paths( + parsed_data["paths"], + path.path, + condition, + limit_to=limit_to, + ) + + +def _transformation_move_handler( + _name: str, + parsed_data: TransformationMoveRuleSpec, + path: AttributePath, + _context: ParserContextData, +) -> TransformationRule: + source_match = parsed_data["source"] + target_path = parsed_data["target"].match_rule.path + condition = parsed_data.get("when") + + if ( + isinstance(source_match, ExactFileSystemPath) + and source_match.path == target_path + ): + raise ManifestParseException( + f"The transformation rule {path.path} requests a move of {source_match} to" + f" {target_path}, which is the same path" + ) + return MoveTransformationRule( + source_match.match_rule, + target_path, + target_path.endswith("/"), + path, + condition, + ) + + +def _transformation_remove_handler( + _name: str, + parsed_data: TransformationRemoveRuleSpec, + attribute_path: AttributePath, + _context: ParserContextData, +) -> TransformationRule: + paths = parsed_data["paths"] + keep_empty_parent_dirs = parsed_data.get("keep_empty_parent_dirs", False) + + return RemoveTransformationRule( + [m.match_rule for m in paths], + keep_empty_parent_dirs, + attribute_path, + ) + + +def _transformation_create_symlink( + _name: str, + parsed_data: CreateSymlinkRule, + attribute_path: AttributePath, + _context: ParserContextData, +) -> TransformationRule: + link_dest = parsed_data["path"].match_rule.path + replacement_rule: CreateSymlinkReplacementRule = parsed_data.get( + "replacement_rule", + "abort-on-non-empty-directory", + ) + try: + link_target = debian_policy_normalize_symlink_target( + link_dest, + parsed_data["target"].symlink_target, + ) + except ValueError as e: # pragma: no cover + raise AssertionError( + "Debian Policy normalization should not raise ValueError here" + ) from e + + condition = parsed_data.get("when") + + return CreateSymlinkPathTransformationRule( + link_target, + link_dest, + replacement_rule, + attribute_path, + condition, + ) + + +def _transformation_path_metadata( + _name: str, + parsed_data: PathManifestRule, + attribute_path: AttributePath, + _context: ParserContextData, +) -> TransformationRule: + match_rules = parsed_data["paths"] + owner = parsed_data.get("owner") + group = parsed_data.get("group") + mode = parsed_data.get("mode") + recursive = parsed_data.get("recursive", False) + capabilities = parsed_data.get("capabilities") + capability_mode = parsed_data.get("capability_mode") + + if capabilities is not None: + if capability_mode is None: + capability_mode = SymbolicMode.parse_filesystem_mode( + "a-s", + attribute_path["capability-mode"], + ) + validate_cap = check_cap_checker() + validate_cap(capabilities, attribute_path["capabilities"].path) + elif capability_mode is not None and capabilities is None: + raise ManifestParseException( + "The attribute capability-mode cannot be provided without capabilities" + f" in {attribute_path.path}" + ) + if owner is None and group is None and mode is None and capabilities is None: + raise ManifestParseException( + "At least one of owner, group, mode, or capabilities must be provided" + f" in {attribute_path.path}" + ) + condition = parsed_data.get("when") + + return PathMetadataTransformationRule( + [m.match_rule for m in match_rules], + owner, + group, + mode, + recursive, + capabilities, + capability_mode, + attribute_path.path, + condition, + ) + + +def _transformation_mkdirs( + _name: str, + parsed_data: EnsureDirectoryRule, + attribute_path: AttributePath, + _context: ParserContextData, +) -> TransformationRule: + provided_paths = parsed_data["paths"] + owner = parsed_data.get("owner") + group = parsed_data.get("group") + mode = parsed_data.get("mode") + + condition = parsed_data.get("when") + + return CreateDirectoryTransformationRule( + [p.match_rule.path for p in provided_paths], + owner, + group, + mode, + attribute_path.path, + condition, + ) + + +def _at_least_two( + content: List[Any], + attribute_path: AttributePath, + attribute_name: str, +) -> None: + if len(content) < 2: + raise ManifestParseException( + f"Must have at least two conditions in {attribute_path[attribute_name].path}" + ) + + +def _mc_any_of( + name: str, + parsed_data: MCAnyOfAllOf, + attribute_path: AttributePath, + _context: ParserContextData, +) -> ManifestCondition: + conditions = parsed_data["conditions"] + _at_least_two(conditions, attribute_path, "conditions") + if name == "any-of": + return ManifestCondition.any_of(conditions) + assert name == "all-of" + return ManifestCondition.all_of(conditions) + + +def _mc_not( + _name: str, + parsed_data: MCNot, + _attribute_path: AttributePath, + _context: ParserContextData, +) -> ManifestCondition: + condition = parsed_data["negated_condition"] + return condition.negated() + + +def _extract_arch_matches( + parsed_data: MCArchMatches, + attribute_path: AttributePath, +) -> List[str]: + arch_matches_as_str = parsed_data["arch_matches"] + # Can we check arch list for typos? If we do, it must be tight in how close matches it does. + # Consider "arm" vs. "armel" (edit distance 2, but both are valid). Likewise, names often + # include a bit indicator "foo", "foo32", "foo64" - all of these have an edit distance of 2 + # of each other. + arch_matches_as_list = arch_matches_as_str.split() + attr_path = attribute_path["arch_matches"] + if not arch_matches_as_list: + raise ManifestParseException( + f"The condition at {attr_path.path} must not be empty" + ) + + if arch_matches_as_list[0].startswith("[") or arch_matches_as_list[-1].endswith( + "]" + ): + raise ManifestParseException( + f"The architecture match at {attr_path.path} must be defined without enclosing it with " + '"[" or/and "]" brackets' + ) + return arch_matches_as_list + + +def _mc_source_context_arch_matches( + _name: str, + parsed_data: MCArchMatches, + attribute_path: AttributePath, + _context: ParserContextData, +) -> ManifestCondition: + arch_matches = _extract_arch_matches(parsed_data, attribute_path) + return SourceContextArchMatchManifestCondition(arch_matches) + + +def _mc_package_context_arch_matches( + name: str, + parsed_data: MCArchMatches, + attribute_path: AttributePath, + context: ParserContextData, +) -> ManifestCondition: + arch_matches = _extract_arch_matches(parsed_data, attribute_path) + + if not context.is_in_binary_package_state: + raise ManifestParseException( + f'The condition "{name}" at {attribute_path.path} can only be used in the context of a binary package.' + ) + + package_state = context.current_binary_package_state + if package_state.binary_package.is_arch_all: + result = context.dpkg_arch_query_table.architecture_is_concerned( + "all", arch_matches + ) + attr_path = attribute_path["arch_matches"] + raise ManifestParseException( + f"The package architecture restriction at {attr_path.path} is applied to the" + f' "Architecture: all" package {package_state.binary_package.name}, which does not make sense' + f" as the condition will always resolves to `{str(result).lower()}`." + f" If you **really** need an architecture specific constraint for this rule, consider using" + f' "source-context-arch-matches" instead. However, this is a very rare use-case!' + ) + return BinaryPackageContextArchMatchManifestCondition(arch_matches) + + +def _mc_arch_matches( + name: str, + parsed_data: MCArchMatches, + attribute_path: AttributePath, + context: ParserContextData, +) -> ManifestCondition: + if context.is_in_binary_package_state: + return _mc_package_context_arch_matches( + name, parsed_data, attribute_path, context + ) + return _mc_source_context_arch_matches(name, parsed_data, attribute_path, context) + + +def _mc_build_profile_matches( + _name: str, + parsed_data: MCBuildProfileMatches, + attribute_path: AttributePath, + _context: ParserContextData, +) -> ManifestCondition: + build_profile_spec = parsed_data["build_profile_matches"].strip() + attr_path = attribute_path["build_profile_matches"] + if not build_profile_spec: + raise ManifestParseException( + f"The condition at {attr_path.path} must not be empty" + ) + try: + active_profiles_match(build_profile_spec, frozenset()) + except ValueError as e: + raise ManifestParseException( + f"Could not parse the build specification at {attr_path.path}: {e.args[0]}" + ) + return BuildProfileMatch(build_profile_spec) diff --git a/src/debputy/plugin/debputy/service_management.py b/src/debputy/plugin/debputy/service_management.py new file mode 100644 index 0000000..1ec8c1b --- /dev/null +++ b/src/debputy/plugin/debputy/service_management.py @@ -0,0 +1,452 @@ +import collections +import dataclasses +import os +import textwrap +from typing import Dict, List, Literal, Iterable, Sequence + +from debputy.packages import BinaryPackage +from debputy.plugin.api.spec import ( + ServiceRegistry, + VirtualPath, + PackageProcessingContext, + BinaryCtrlAccessor, + ServiceDefinition, +) +from debputy.util import _error, assume_not_none + +DPKG_ROOT = '"${DPKG_ROOT}"' +EMPTY_DPKG_ROOT_CONDITION = '[ -z "${DPKG_ROOT}" ]' +SERVICE_MANAGER_IS_SYSTEMD_CONDITION = "[ -d /run/systemd/system ]" + + +@dataclasses.dataclass(slots=True) +class SystemdServiceContext: + had_install_section: bool + + +@dataclasses.dataclass(slots=True) +class SystemdUnit: + path: VirtualPath + names: List[str] + type_of_service: str + service_scope: str + enable_by_default: bool + start_by_default: bool + had_install_section: bool + + +def detect_systemd_service_files( + fs_root: VirtualPath, + service_registry: ServiceRegistry[SystemdServiceContext], + context: PackageProcessingContext, +) -> None: + pkg = context.binary_package + systemd_units = _find_and_analyze_systemd_service_files(pkg, fs_root, "system") + for unit in systemd_units: + service_registry.register_service( + unit.path, + unit.names, + type_of_service=unit.type_of_service, + service_scope=unit.service_scope, + enable_by_default=unit.enable_by_default, + start_by_default=unit.start_by_default, + default_upgrade_rule="restart" if unit.start_by_default else "do-nothing", + service_context=SystemdServiceContext( + unit.had_install_section, + ), + ) + + +def generate_snippets_for_systemd_units( + services: Sequence[ServiceDefinition[SystemdServiceContext]], + ctrl: BinaryCtrlAccessor, + _context: PackageProcessingContext, +) -> None: + stop_in_prerm: List[str] = [] + stop_then_start_scripts = [] + on_purge = [] + start_on_install = [] + action_on_upgrade = collections.defaultdict(list) + assert services + + for service_def in services: + if service_def.auto_enable_on_install: + template = """\ + if deb-systemd-helper debian-installed {UNITFILE}; then + # The following line should be removed in trixie or trixie+1 + deb-systemd-helper unmask {UNITFILE} >/dev/null || true + + if deb-systemd-helper --quiet was-enabled {UNITFILE}; then + # Create new symlinks, if any. + deb-systemd-helper enable {UNITFILE} >/dev/null || true + fi + fi + + # Update the statefile to add new symlinks (if any), which need to be cleaned + # up on purge. Also remove old symlinks. + deb-systemd-helper update-state {UNITFILE} >/dev/null || true + """ + else: + template = """\ + # The following line should be removed in trixie or trixie+1 + deb-systemd-helper unmask {UNITFILE} >/dev/null || true + + # was-enabled defaults to true, so new installations run enable. + if deb-systemd-helper --quiet was-enabled {UNITFILE}; then + # Enables the unit on first installation, creates new + # symlinks on upgrades if the unit file has changed. + deb-systemd-helper enable {UNITFILE} >/dev/null || true + else + # Update the statefile to add new symlinks (if any), which need to be + # cleaned up on purge. Also remove old symlinks. + deb-systemd-helper update-state {UNITFILE} >/dev/null || true + fi + """ + service_name = service_def.name + + if assume_not_none(service_def.service_context).had_install_section: + ctrl.maintscript.on_configure( + template.format( + UNITFILE=ctrl.maintscript.escape_shell_words(service_name), + ) + ) + on_purge.append(service_name) + elif service_def.auto_enable_on_install: + _error( + f'The service "{service_name}" cannot be enabled under "systemd" as' + f' it has no "[Install]" section. Please correct {service_def.definition_source}' + f' so that it does not enable the service or does not apply to "systemd"' + ) + + if service_def.auto_start_in_install: + start_on_install.append(service_name) + if service_def.on_upgrade == "stop-then-start": + stop_then_start_scripts.append(service_name) + elif service_def.on_upgrade in ("restart", "reload"): + action: str = service_def.on_upgrade + if not service_def.auto_start_in_install and action != "reload": + action = f"try-{action}" + action_on_upgrade[action].append(service_name) + elif service_def.on_upgrade != "do-nothing": + raise AssertionError( + f"Missing support for on_upgrade rule: {service_def.on_upgrade}" + ) + + if start_on_install or action_on_upgrade: + lines = [ + "if {EMPTY_DPKG_ROOT_CONDITION} && {SERVICE_MANAGER_IS_SYSTEMD_CONDITION}; then".format( + EMPTY_DPKG_ROOT_CONDITION=EMPTY_DPKG_ROOT_CONDITION, + SERVICE_MANAGER_IS_SYSTEMD_CONDITION=SERVICE_MANAGER_IS_SYSTEMD_CONDITION, + ), + " systemctl --system daemon-reload >/dev/null || true", + ] + if stop_then_start_scripts: + unit_files = ctrl.maintscript.escape_shell_words(*stop_then_start_scripts) + lines.append( + " deb-systemd-invoke start {UNITFILES} >/dev/null || true".format( + UNITFILES=unit_files, + ) + ) + if start_on_install: + lines.append(' if [ -z "$2" ]; then') + lines.append( + " deb-systemd-invoke start {UNITFILES} >/dev/null || true".format( + UNITFILES=ctrl.maintscript.escape_shell_words(*start_on_install), + ) + ) + lines.append(" fi") + if action_on_upgrade: + lines.append(' if [ -n "$2" ]; then') + for action, units in action_on_upgrade.items(): + lines.append( + " deb-systemd-invoke {ACTION} {UNITFILES} >/dev/null || true".format( + ACTION=action, + UNITFILES=ctrl.maintscript.escape_shell_words(*units), + ) + ) + lines.append(" fi") + lines.append("fi") + combined = "".join(x if x.endswith("\n") else f"{x}\n" for x in lines) + ctrl.maintscript.on_configure(combined) + + if stop_then_start_scripts: + ctrl.maintscript.unconditionally_in_script( + "preinst", + textwrap.dedent( + """\ + if {EMPTY_DPKG_ROOT_CONDITION} && [ "$1" = upgrade ] && {SERVICE_MANAGER_IS_SYSTEMD_CONDITION} ; then + deb-systemd-invoke stop {UNIT_FILES} >/dev/null || true + fi + """.format( + EMPTY_DPKG_ROOT_CONDITION=EMPTY_DPKG_ROOT_CONDITION, + SERVICE_MANAGER_IS_SYSTEMD_CONDITION=SERVICE_MANAGER_IS_SYSTEMD_CONDITION, + UNIT_FILES=ctrl.maintscript.escape_shell_words( + *stop_then_start_scripts + ), + ) + ), + ) + + if stop_in_prerm: + ctrl.maintscript.on_before_removal( + """\ + if {EMPTY_DPKG_ROOT_CONDITION} && {SERVICE_MANAGER_IS_SYSTEMD_CONDITION} ; then + deb-systemd-invoke stop {UNIT_FILES} >/dev/null || true + fi + """.format( + EMPTY_DPKG_ROOT_CONDITION=EMPTY_DPKG_ROOT_CONDITION, + SERVICE_MANAGER_IS_SYSTEMD_CONDITION=SERVICE_MANAGER_IS_SYSTEMD_CONDITION, + UNIT_FILES=ctrl.maintscript.escape_shell_words(*stop_in_prerm), + ) + ) + if on_purge: + ctrl.maintscript.on_purge( + """\ + if [ -x "/usr/bin/deb-systemd-helper" ]; then + deb-systemd-helper purge {UNITFILES} >/dev/null || true + fi + """.format( + UNITFILES=ctrl.maintscript.escape_shell_words(*stop_in_prerm), + ) + ) + ctrl.maintscript.on_removed( + textwrap.dedent( + """\ + if {SERVICE_MANAGER_IS_SYSTEMD_CONDITION} ; then + systemctl --system daemon-reload >/dev/null || true + fi + """.format( + SERVICE_MANAGER_IS_SYSTEMD_CONDITION=SERVICE_MANAGER_IS_SYSTEMD_CONDITION + ) + ) + ) + + +def _remove_quote(v: str) -> str: + if v and v[0] == v[-1] and v[0] in ('"', "'"): + return v[1:-1] + return v + + +def _find_and_analyze_systemd_service_files( + pkg: BinaryPackage, + fs_root: VirtualPath, + systemd_service_dir: Literal["system", "user"], +) -> Iterable[SystemdUnit]: + service_dirs = [ + f"./usr/lib/systemd/{systemd_service_dir}", + f"./lib/systemd/{systemd_service_dir}", + ] + had_install_sections = set() + aliases: Dict[str, List[str]] = collections.defaultdict(list) + seen = set() + all_files = [] + expected_units = set() + expected_units_required_by = collections.defaultdict(list) + + for d in service_dirs: + system_dir = fs_root.lookup(d) + if not system_dir: + continue + for child in system_dir.iterdir: + if child.is_symlink: + dest = os.path.basename(child.readlink()) + aliases[dest].append(child.name) + elif child.is_file and child.name not in seen: + seen.add(child.name) + all_files.append(child) + if "@" in child.name: + # dh_installsystemd does not check the contents of templated services, + # and we match that. + continue + with child.open() as fd: + for line in fd: + line = line.strip() + line_lc = line.lower() + if line_lc == "[install]": + had_install_sections.add(child.name) + elif line_lc.startswith("alias="): + # This code assumes service names cannot contain spaces (as in + # if you copy-paste it for another field it might not work) + aliases[child.name].extend( + _remove_quote(x) for x in line[6:].split() + ) + elif line_lc.startswith("also="): + # This code assumes service names cannot contain spaces (as in + # if you copy-paste it for another field it might not work) + for unit in (_remove_quote(x) for x in line[5:].split()): + expected_units_required_by[unit].append(child.absolute) + expected_units.add(unit) + for path in all_files: + if "@" in path.name: + # Match dh_installsystemd, which skips templated services + continue + names = aliases[path.name] + _, type_of_service = path.name.rsplit(".", 1) + expected_units.difference_update(names) + expected_units.discard(path.name) + names.extend(x[:-8] for x in list(names) if x.endswith(".service")) + names.insert(0, path.name) + if path.name.endswith(".service"): + names.insert(1, path.name[:-8]) + yield SystemdUnit( + path, + names, + type_of_service, + systemd_service_dir, + # Bug (?) compat with dh_installsystemd. All units are started, but only + # enable those with an `[Install]` section. + # Possibly related bug #1055599 + enable_by_default=path.name in had_install_sections, + start_by_default=True, + had_install_section=path.name in had_install_sections, + ) + + if expected_units: + for unit_name in expected_units: + required_by = expected_units_required_by[unit_name] + required_names = ", ".join(required_by) + _error( + f"The unit {unit_name} was required by {required_names} (via Also=...)" + f" but was not present in the package {pkg.name}" + ) + + +def generate_snippets_for_init_scripts( + services: Sequence[ServiceDefinition[None]], + ctrl: BinaryCtrlAccessor, + _context: PackageProcessingContext, +) -> None: + for service_def in services: + script_name = service_def.path.name + script_installed_path = service_def.path.absolute + + update_rcd_params = ( + "defaults" if service_def.auto_enable_on_install else "defaults-disabled" + ) + + ctrl.maintscript.unconditionally_in_script( + "preinst", + textwrap.dedent( + """\ + if [ "$1" = "install" ] && [ -n "$2" ] && [ -x {DPKG_ROOT}{SCRIPT_PATH} ] ; then + chmod +x {DPKG_ROOT}{SCRIPT_PATH} >/dev/null || true + fi + """.format( + DPKG_ROOT=DPKG_ROOT, + SCRIPT_PATH=ctrl.maintscript.escape_shell_words( + script_installed_path + ), + ) + ), + ) + + lines = [ + "if {EMPTY_DPKG_ROOT_CONDITION} && [ -x {SCRIPT_PATH} ]; then", + " update-rc.d {SCRIPT_NAME} {UPDATE_RCD_PARAMS} >/dev/null || exit 1", + ] + + if ( + service_def.auto_start_in_install + and service_def.on_upgrade != "stop-then-start" + ): + lines.append(' if [ -z "$2" ]; then') + lines.append( + " invoke-rc.d --skip-systemd-native {SCRIPT_NAME} start >/dev/null || exit 1".format( + SCRIPT_NAME=ctrl.maintscript.escape_shell_words(script_name), + ) + ) + lines.append(" fi") + + if service_def.on_upgrade in ("restart", "reload"): + lines.append(' if [ -n "$2" ]; then') + lines.append( + " invoke-rc.d --skip-systemd-native {SCRIPT_NAME} {ACTION} >/dev/null || exit 1".format( + SCRIPT_NAME=ctrl.maintscript.escape_shell_words(script_name), + ACTION=service_def.on_upgrade, + ) + ) + lines.append(" fi") + elif service_def.on_upgrade == "stop-then-start": + lines.append( + " invoke-rc.d --skip-systemd-native {SCRIPT_NAME} start >/dev/null || exit 1".format( + SCRIPT_NAME=ctrl.maintscript.escape_shell_words(script_name), + ) + ) + ctrl.maintscript.unconditionally_in_script( + "preinst", + textwrap.dedent( + """\ + if {EMPTY_DPKG_ROOT_CONDITION} && [ "$1" = "upgrade" ] && [ -x {SCRIPT_PATH} ]; then + invoke-rc.d --skip-systemd-native {SCRIPT_NAME} stop > /dev/null || true + fi + """.format( + EMPTY_DPKG_ROOT_CONDITION=EMPTY_DPKG_ROOT_CONDITION, + SCRIPT_PATH=ctrl.maintscript.escape_shell_words( + script_installed_path + ), + SCRIPT_NAME=ctrl.maintscript.escape_shell_words(script_name), + ) + ), + ) + elif service_def.on_upgrade != "do-nothing": + raise AssertionError( + f"Missing support for on_upgrade rule: {service_def.on_upgrade}" + ) + + lines.append("fi") + combined = "".join(x if x.endswith("\n") else f"{x}\n" for x in lines) + ctrl.maintscript.on_configure( + combined.format( + EMPTY_DPKG_ROOT_CONDITION=EMPTY_DPKG_ROOT_CONDITION, + DPKG_ROOT=DPKG_ROOT, + UPDATE_RCD_PARAMS=update_rcd_params, + SCRIPT_PATH=ctrl.maintscript.escape_shell_words(script_installed_path), + SCRIPT_NAME=ctrl.maintscript.escape_shell_words(script_name), + ) + ) + + ctrl.maintscript.on_removed( + textwrap.dedent( + """\ + if [ -x {DPKG_ROOT}{SCRIPT_PATH} ]; then + chmod -x {DPKG_ROOT}{SCRIPT_PATH} > /dev/null || true + fi + """.format( + DPKG_ROOT=DPKG_ROOT, + SCRIPT_PATH=ctrl.maintscript.escape_shell_words( + script_installed_path + ), + ) + ) + ) + ctrl.maintscript.on_purge( + textwrap.dedent( + """\ + if {EMPTY_DPKG_ROOT_CONDITION} ; then + update-rc.d {SCRIPT_NAME} remove >/dev/null + fi + """.format( + SCRIPT_NAME=ctrl.maintscript.escape_shell_words(script_name), + EMPTY_DPKG_ROOT_CONDITION=EMPTY_DPKG_ROOT_CONDITION, + ) + ) + ) + + +def detect_sysv_init_service_files( + fs_root: VirtualPath, + service_registry: ServiceRegistry[None], + _context: PackageProcessingContext, +) -> None: + etc_init = fs_root.lookup("/etc/init.d") + if not etc_init: + return + for path in etc_init.iterdir: + if path.is_dir or not path.is_executable: + continue + + service_registry.register_service( + path, + path.name, + ) diff --git a/src/debputy/plugin/debputy/shlib_metadata_detectors.py b/src/debputy/plugin/debputy/shlib_metadata_detectors.py new file mode 100644 index 0000000..aa28fa9 --- /dev/null +++ b/src/debputy/plugin/debputy/shlib_metadata_detectors.py @@ -0,0 +1,47 @@ +from typing import List + +from debputy import elf_util +from debputy.elf_util import ELF_LINKING_TYPE_DYNAMIC +from debputy.plugin.api import ( + VirtualPath, + PackageProcessingContext, +) +from debputy.plugin.api.impl import BinaryCtrlAccessorProvider + +SKIPPED_DEBUG_DIRS = [ + "lib", + "lib64", + "usr", + "bin", + "sbin", + "opt", + "dev", + "emul", + ".build-id", +] + +SKIP_DIRS = {f"./usr/lib/debug/{subdir}" for subdir in SKIPPED_DEBUG_DIRS} + + +def _walk_filter(fs_path: VirtualPath, children: List[VirtualPath]) -> bool: + if fs_path.path in SKIP_DIRS: + children.clear() + return False + return True + + +def detect_shlibdeps( + fs_root: VirtualPath, + ctrl: BinaryCtrlAccessorProvider, + _context: PackageProcessingContext, +) -> None: + elf_files_to_process = elf_util.find_all_elf_files( + fs_root, + walk_filter=_walk_filter, + with_linking_type=ELF_LINKING_TYPE_DYNAMIC, + ) + + if not elf_files_to_process: + return + + ctrl.dpkg_shlibdeps(elf_files_to_process) diff --git a/src/debputy/plugin/debputy/strip_non_determinism.py b/src/debputy/plugin/debputy/strip_non_determinism.py new file mode 100644 index 0000000..2f8fd39 --- /dev/null +++ b/src/debputy/plugin/debputy/strip_non_determinism.py @@ -0,0 +1,264 @@ +import dataclasses +import os.path +import re +import subprocess +from contextlib import ExitStack +from enum import IntEnum +from typing import Iterator, Optional, List, Callable, Any, Tuple, Union + +from debputy.plugin.api import VirtualPath +from debputy.plugin.api.impl_types import PackageProcessingContextProvider +from debputy.util import xargs, _info, escape_shell, _error + + +class DetectionVerdict(IntEnum): + NOT_RELEVANT = 1 + NEEDS_FILE_OUTPUT = 2 + PROCESS = 3 + + +def _file_starts_with( + sequences: Union[bytes, Tuple[bytes, ...]] +) -> Callable[[VirtualPath], bool]: + if isinstance(sequences, bytes): + longest_sequence = len(sequences) + sequences = (sequences,) + else: + longest_sequence = max(len(s) for s in sequences) + + def _checker(path: VirtualPath) -> bool: + with path.open(byte_io=True, buffering=4096) as fd: + buffer = fd.read(longest_sequence) + return buffer in sequences + + return _checker + + +def _is_javadoc_file(path: VirtualPath) -> bool: + with path.open(buffering=4096) as fd: + c = fd.read(1024) + return "<!-- Generated by javadoc" in c + + +class SndDetectionRule: + def initial_verdict(self, path: VirtualPath) -> DetectionVerdict: + raise NotImplementedError + + def file_output_verdict( + self, + path: VirtualPath, + file_analysis: Optional[str], + ) -> bool: + raise TypeError( + "Should not have been called or the rule forgot to implement this method" + ) + + +@dataclasses.dataclass(frozen=True, slots=True) +class ExtensionPlusFileOutputRule(SndDetectionRule): + extensions: Tuple[str, ...] + file_pattern: Optional[re.Pattern[str]] = None + + def initial_verdict(self, path: VirtualPath) -> DetectionVerdict: + _, ext = os.path.splitext(path.name) + if ext not in self.extensions: + return DetectionVerdict.NOT_RELEVANT + if self.file_pattern is None: + return DetectionVerdict.PROCESS + return DetectionVerdict.NEEDS_FILE_OUTPUT + + def file_output_verdict( + self, + path: VirtualPath, + file_analysis: str, + ) -> bool: + file_pattern = self.file_pattern + assert file_pattern is not None + m = file_pattern.search(file_analysis) + return m is not None + + +@dataclasses.dataclass(frozen=True, slots=True) +class ExtensionPlusContentCheck(SndDetectionRule): + extensions: Tuple[str, ...] + content_check: Callable[[VirtualPath], bool] + + def initial_verdict(self, path: VirtualPath) -> DetectionVerdict: + _, ext = os.path.splitext(path.name) + if ext not in self.extensions: + return DetectionVerdict.NOT_RELEVANT + content_verdict = self.content_check(path) + if content_verdict: + return DetectionVerdict.PROCESS + return DetectionVerdict.NOT_RELEVANT + + +class PyzipFileCheck(SndDetectionRule): + def _is_pyzip_file(self, path: VirtualPath) -> bool: + with path.open(byte_io=True, buffering=4096) as fd: + c = fd.read(32) + if not c.startswith(b"#!"): + return False + + return b"\nPK\x03\x04" in c + + def initial_verdict(self, path: VirtualPath) -> DetectionVerdict: + if self._is_pyzip_file(path): + return DetectionVerdict.PROCESS + return DetectionVerdict.NOT_RELEVANT + + +# These detection rules should be aligned with `get_normalizer_for_file` in File::StripNondeterminism. +# Note if we send a file too much, it is just bad for performance. If we send a file to little, we +# risk non-determinism in the final output. +SND_DETECTION_RULES: List[SndDetectionRule] = [ + ExtensionPlusContentCheck( + extensions=(".a",), + content_check=_file_starts_with( + ( + b"!<arch>\n", + b"!<thin>\n", + ), + ), + ), + ExtensionPlusContentCheck( + extensions=(".png",), + content_check=_file_starts_with(b"\x89PNG\x0D\x0A\x1A\x0A"), + ), + ExtensionPlusContentCheck( + extensions=(".gz", ".dz"), + content_check=_file_starts_with(b"\x1F\x8B"), + ), + ExtensionPlusContentCheck( + extensions=( + # .zip related + ".zip", + ".pk3", + ".epub", + ".whl", + ".xpi", + ".htb", + ".zhfst", + ".par", + ".codadef", + # .jar related + ".jar", + ".war", + ".hpi", + ".apk", + ".sym", + ), + content_check=_file_starts_with( + ( + b"PK\x03\x04\x1F", + b"PK\x05\x06", + b"PK\x07\x08", + ) + ), + ), + ExtensionPlusContentCheck( + extensions=( + ".mo", + ".gmo", + ), + content_check=_file_starts_with( + ( + b"\x95\x04\x12\xde", + b"\xde\x12\x04\x95", + ) + ), + ), + ExtensionPlusContentCheck( + extensions=(".uimage",), + content_check=_file_starts_with(b"\x27\x05\x19\x56"), + ), + ExtensionPlusContentCheck( + extensions=(".bflt",), + content_check=_file_starts_with(b"\x62\x46\x4C\x54"), + ), + ExtensionPlusContentCheck( + extensions=(".jmod",), + content_check=_file_starts_with(b"JM"), + ), + ExtensionPlusContentCheck( + extensions=(".html",), + content_check=_is_javadoc_file, + ), + PyzipFileCheck(), + ExtensionPlusFileOutputRule( + extensions=(".cpio",), + # XXX: Add file output check (requires the file output support) + ), +] + + +def _detect_paths_with_possible_non_determinism( + fs_root: VirtualPath, +) -> Iterator[VirtualPath]: + needs_file_output = [] + for path in fs_root.all_paths(): + if not path.is_file: + continue + verdict = DetectionVerdict.NOT_RELEVANT + needs_file_output_rules = [] + for rule in SND_DETECTION_RULES: + v = rule.initial_verdict(path) + if v > verdict: + verdict = v + if verdict == DetectionVerdict.PROCESS: + yield path + break + elif verdict == DetectionVerdict.NEEDS_FILE_OUTPUT: + needs_file_output_rules.append(rule) + + if verdict == DetectionVerdict.NEEDS_FILE_OUTPUT: + needs_file_output.append((path, needs_file_output_rules)) + + assert not needs_file_output + # FIXME: Implement file check + + +def _apply_strip_non_determinism(timestamp: str, paths: List[VirtualPath]) -> None: + static_cmd = [ + "strip-nondeterminism", + f"--timestamp={timestamp}", + "-v", + "--normalizers=+all", + ] + with ExitStack() as manager: + affected_files = [ + manager.enter_context(p.replace_fs_path_content()) for p in paths + ] + for cmd in xargs(static_cmd, affected_files): + _info( + f"Removing (possible) unnecessary non-deterministic content via: {escape_shell(*cmd)}" + ) + try: + subprocess.check_call( + cmd, + stdin=subprocess.DEVNULL, + restore_signals=True, + ) + except subprocess.CalledProcessError: + _error( + "Attempting to remove unnecessary non-deterministic content failed. Please review" + " the error from strip-nondeterminism above understand what went wrong." + ) + + +def strip_non_determinism( + fs_root: VirtualPath, _: Any, context: PackageProcessingContextProvider +) -> None: + paths = list(_detect_paths_with_possible_non_determinism(fs_root)) + + if not paths: + _info("Detected no paths to be processed by strip-nondeterminism") + return + + substitution = context._manifest.substitution + + source_date_epoch = substitution.substitute( + "{{_DEBPUTY_SND_SOURCE_DATE_EPOCH}}", "Internal; strip-nondeterminism" + ) + + _apply_strip_non_determinism(source_date_epoch, paths) diff --git a/src/debputy/plugin/debputy/types.py b/src/debputy/plugin/debputy/types.py new file mode 100644 index 0000000..dc8d0ce --- /dev/null +++ b/src/debputy/plugin/debputy/types.py @@ -0,0 +1,10 @@ +import dataclasses + +from debputy.manifest_parser.base_types import FileSystemMode + + +@dataclasses.dataclass(slots=True) +class DebputyCapability: + capabilities: str + capability_mode: FileSystemMode + definition_source: str diff --git a/src/debputy/substitution.py b/src/debputy/substitution.py new file mode 100644 index 0000000..0923d8f --- /dev/null +++ b/src/debputy/substitution.py @@ -0,0 +1,336 @@ +import dataclasses +import os +import re +from enum import IntEnum +from typing import FrozenSet, NoReturn, Optional, Set, Mapping, TYPE_CHECKING, Self + +from debputy.architecture_support import ( + dpkg_architecture_table, + DpkgArchitectureBuildProcessValuesTable, +) +from debputy.exceptions import DebputySubstitutionError +from debputy.util import glob_escape + +if TYPE_CHECKING: + from debputy.plugin.api.feature_set import PluginProvidedFeatureSet + from debputy.plugin.api import VirtualPath + + +SUBST_VAR_RE = re.compile( + r""" + ([{][{][ ]*) + + ( + _?[A-Za-z0-9]+ + (?:[-_:][A-Za-z0-9]+)* + ) + + ([ ]*[}][}]) +""", + re.VERBOSE, +) + + +class VariableNameState(IntEnum): + UNDEFINED = 1 + RESERVED = 2 + DEFINED = 3 + + +@dataclasses.dataclass(slots=True, frozen=True) +class VariableContext: + debian_dir: "VirtualPath" + + +class Substitution: + def substitute( + self, + value: str, + definition_source: str, + /, + escape_glob_characters: bool = False, + ) -> str: + raise NotImplementedError + + def with_extra_substitutions(self, **extra_substitutions: str) -> "Substitution": + raise NotImplementedError + + def with_unresolvable_substitutions( + self, *extra_substitutions: str + ) -> "Substitution": + raise NotImplementedError + + def variable_state(self, variable_name: str) -> VariableNameState: + return VariableNameState.UNDEFINED + + def is_used(self, variable_name: str) -> bool: + return False + + def _mark_used(self, variable_name: str) -> None: + pass + + def _replacement(self, matched_key: str, definition_source: str) -> str: + self._error( + "Cannot resolve {{" + matched_key + "}}." + f" The error occurred while trying to process {definition_source}" + ) + + def _error( + self, + msg: str, + *, + caused_by: Optional[BaseException] = None, + ) -> NoReturn: + raise DebputySubstitutionError(msg) from caused_by + + def _apply_substitution( + self, + pattern: re.Pattern[str], + value: str, + definition_source: str, + /, + escape_glob_characters: bool = False, + ) -> str: + replacement = value + offset = 0 + for match in pattern.finditer(value): + prefix, matched_key, suffix = match.groups() + replacement_value = self._replacement(matched_key, definition_source) + self._mark_used(matched_key) + if escape_glob_characters: + replacement_value = glob_escape(replacement_value) + s, e = match.span() + s += offset + e += offset + replacement = replacement[:s] + replacement_value + replacement[e:] + token_fluff_len = len(prefix) + len(suffix) + offset += len(replacement_value) - len(matched_key) - token_fluff_len + return replacement + + +class NullSubstitution(Substitution): + def substitute( + self, + value: str, + definition_source: str, + /, + escape_glob_characters: bool = False, + ) -> str: + return value + + def with_extra_substitutions(self, **extra_substitutions: str) -> "Substitution": + return self + + def with_unresolvable_substitutions( + self, *extra_substitutions: str + ) -> "Substitution": + return self + + +NULL_SUBSTITUTION = NullSubstitution() +del NullSubstitution + + +class SubstitutionImpl(Substitution): + __slots__ = ( + "_used", + "_env", + "_plugin_feature_set", + "_static_variables", + "_unresolvable_substitutions", + "_dpkg_arch_table", + "_parent", + "_variable_context", + ) + + def __init__( + self, + /, + plugin_feature_set: Optional["PluginProvidedFeatureSet"] = None, + static_variables: Optional[Mapping[str, str]] = None, + unresolvable_substitutions: FrozenSet[str] = frozenset(), + dpkg_arch_table: Optional[DpkgArchitectureBuildProcessValuesTable] = None, + environment: Optional[Mapping[str, str]] = None, + parent: Optional["SubstitutionImpl"] = None, + variable_context: Optional[VariableContext] = None, + ) -> None: + self._used: Set[str] = set() + self._plugin_feature_set = plugin_feature_set + self._static_variables = ( + dict(static_variables) if static_variables is not None else None + ) + self._unresolvable_substitutions = unresolvable_substitutions + self._dpkg_arch_table = ( + dpkg_arch_table + if dpkg_arch_table is not None + else dpkg_architecture_table() + ) + self._env = environment if environment is not None else os.environ + self._parent = parent + if variable_context is not None: + self._variable_context = variable_context + elif self._parent is not None: + self._variable_context = self._parent._variable_context + else: + raise ValueError( + "variable_context is required either directly or via the parent" + ) + + def copy_for_subst_test( + self, + plugin_feature_set: "PluginProvidedFeatureSet", + variable_context: VariableContext, + *, + extra_substitutions: Optional[Mapping[str, str]] = None, + environment: Optional[Mapping[str, str]] = None, + ) -> "Self": + extra_substitutions_impl = ( + dict(self._static_variables.items()) if self._static_variables else {} + ) + if extra_substitutions: + extra_substitutions_impl.update(extra_substitutions) + return self.__class__( + plugin_feature_set=plugin_feature_set, + variable_context=variable_context, + static_variables=extra_substitutions_impl, + unresolvable_substitutions=self._unresolvable_substitutions, + dpkg_arch_table=self._dpkg_arch_table, + environment=environment if environment is not None else {}, + ) + + def variable_state(self, key: str) -> VariableNameState: + if key.startswith("DEB_"): + if key in self._dpkg_arch_table: + return VariableNameState.DEFINED + return VariableNameState.RESERVED + plugin_feature_set = self._plugin_feature_set + if ( + plugin_feature_set is not None + and key in plugin_feature_set.manifest_variables + ): + return VariableNameState.DEFINED + if key.startswith("env:"): + k = key[4:] + if k in self._env: + return VariableNameState.DEFINED + return VariableNameState.RESERVED + if self._static_variables is not None and key in self._static_variables: + return VariableNameState.DEFINED + if key in self._unresolvable_substitutions: + return VariableNameState.RESERVED + if self._parent is not None: + return self._parent.variable_state(key) + return VariableNameState.UNDEFINED + + def is_used(self, variable_name: str) -> bool: + if variable_name in self._used: + return True + parent = self._parent + if parent is not None: + return parent.is_used(variable_name) + return False + + def _mark_used(self, variable_name: str) -> None: + p = self._parent + while p: + # Find the parent that has the variable if possible. This ensures that is_used works + # correctly. + if p._static_variables is not None and variable_name in p._static_variables: + p._mark_used(variable_name) + break + plugin_feature_set = p._plugin_feature_set + if ( + plugin_feature_set is not None + and variable_name in plugin_feature_set.manifest_variables + and not plugin_feature_set.manifest_variables[ + variable_name + ].is_documentation_placeholder + ): + p._mark_used(variable_name) + break + p = p._parent + self._used.add(variable_name) + + def _replacement(self, key: str, definition_source: str) -> str: + if key.startswith("DEB_") and key in self._dpkg_arch_table: + return self._dpkg_arch_table[key] + if key.startswith("env:"): + k = key[4:] + if k in self._env: + return self._env[k] + self._error( + f'The environment does not contain the variable "{key}" ' + f"(error occurred while trying to process {definition_source})" + ) + + # The order between extra_substitution and plugin_feature_set is leveraged by + # the tests to implement mocking variables. If the order needs tweaking, + # you will need a custom resolver for the tests to support mocking. + static_variables = self._static_variables + if static_variables and key in static_variables: + return static_variables[key] + plugin_feature_set = self._plugin_feature_set + if plugin_feature_set is not None: + provided_var = plugin_feature_set.manifest_variables.get(key) + if ( + provided_var is not None + and not provided_var.is_documentation_placeholder + ): + v = provided_var.resolve(self._variable_context) + # cache it for next time. + if static_variables is None: + static_variables = {} + self._static_variables = static_variables + static_variables[key] = v + return v + if key in self._unresolvable_substitutions: + self._error( + "The variable {{" + key + "}}" + f" is not available while processing {definition_source}." + ) + parent = self._parent + if parent is not None: + return parent._replacement(key, definition_source) + self._error( + "Cannot resolve {{" + key + "}}: it is not a known key." + f" The error occurred while trying to process {definition_source}" + ) + + def with_extra_substitutions(self, **extra_substitutions: str) -> "Substitution": + if not extra_substitutions: + return self + return SubstitutionImpl( + dpkg_arch_table=self._dpkg_arch_table, + environment=self._env, + static_variables=extra_substitutions, + parent=self, + ) + + def with_unresolvable_substitutions( + self, + *extra_substitutions: str, + ) -> "Substitution": + if not extra_substitutions: + return self + return SubstitutionImpl( + dpkg_arch_table=self._dpkg_arch_table, + environment=self._env, + unresolvable_substitutions=frozenset(extra_substitutions), + parent=self, + ) + + def substitute( + self, + value: str, + definition_source: str, + /, + escape_glob_characters: bool = False, + ) -> str: + if "{{" not in value: + return value + return self._apply_substitution( + SUBST_VAR_RE, + value, + definition_source, + escape_glob_characters=escape_glob_characters, + ) diff --git a/src/debputy/transformation_rules.py b/src/debputy/transformation_rules.py new file mode 100644 index 0000000..8d9caae --- /dev/null +++ b/src/debputy/transformation_rules.py @@ -0,0 +1,596 @@ +import dataclasses +import os +from typing import ( + NoReturn, + Optional, + Callable, + Sequence, + Tuple, + List, + Literal, + Dict, + TypeVar, + cast, +) + +from debputy.exceptions import ( + DebputyRuntimeError, + PureVirtualPathError, + TestPathWithNonExistentFSPathError, +) +from debputy.filesystem_scan import FSPath +from debputy.interpreter import ( + extract_shebang_interpreter_from_file, +) +from debputy.manifest_conditions import ConditionContext, ManifestCondition +from debputy.manifest_parser.base_types import ( + FileSystemMode, + StaticFileSystemOwner, + StaticFileSystemGroup, + DebputyDispatchableType, +) +from debputy.manifest_parser.util import AttributePath +from debputy.path_matcher import MatchRule +from debputy.plugin.api import VirtualPath +from debputy.plugin.debputy.types import DebputyCapability +from debputy.util import _warn + + +class TransformationRuntimeError(DebputyRuntimeError): + pass + + +CreateSymlinkReplacementRule = Literal[ + "error-if-exists", + "error-if-directory", + "abort-on-non-empty-directory", + "discard-existing", +] + + +VP = TypeVar("VP", bound=VirtualPath) + + +@dataclasses.dataclass(frozen=True, slots=True) +class PreProvidedExclusion: + tag: str + description: str + pruner: Callable[[FSPath], None] + + +class TransformationRule(DebputyDispatchableType): + __slots__ = () + + def transform_file_system( + self, fs_root: FSPath, condition_context: ConditionContext + ) -> None: + raise NotImplementedError + + def _evaluate_condition( + self, + condition: Optional[ManifestCondition], + condition_context: ConditionContext, + result_if_condition_is_missing: bool = True, + ) -> bool: + if condition is None: + return result_if_condition_is_missing + return condition.evaluate(condition_context) + + def _error( + self, + msg: str, + *, + caused_by: Optional[BaseException] = None, + ) -> NoReturn: + raise TransformationRuntimeError(msg) from caused_by + + def _match_rule_had_no_matches( + self, match_rule: MatchRule, definition_source: str + ) -> NoReturn: + self._error( + f'The match rule "{match_rule.describe_match_short()}" in transformation "{definition_source}" did' + " not match any paths. Either the definition is redundant (and can be omitted) or the match rule is" + " incorrect." + ) + + def _fs_path_as_dir( + self, + path: VP, + definition_source: str, + ) -> VP: + if path.is_dir: + return path + path_type = "file" if path.is_file else 'symlink/"special file system object"' + self._error( + f"The path {path.path} was expected to be a directory (or non-existing) due to" + f" {definition_source}. However that path existed and is a {path_type}." + f" You may need a `remove: {path.path}` prior to {definition_source} to" + " to make this transformation succeed." + ) + + def _ensure_is_directory( + self, + fs_root: FSPath, + path_to_directory: str, + definition_source: str, + ) -> FSPath: + current, missing_parts = fs_root.attempt_lookup(path_to_directory) + current = self._fs_path_as_dir(cast("FSPath", current), definition_source) + if missing_parts: + return current.mkdirs("/".join(missing_parts)) + return current + + +class RemoveTransformationRule(TransformationRule): + __slots__ = ( + "_match_rules", + "_keep_empty_parent_dirs", + "_definition_source", + ) + + def __init__( + self, + match_rules: Sequence[MatchRule], + keep_empty_parent_dirs: bool, + definition_source: AttributePath, + ) -> None: + self._match_rules = match_rules + self._keep_empty_parent_dirs = keep_empty_parent_dirs + self._definition_source = definition_source.path + + def transform_file_system( + self, + fs_root: FSPath, + condition_context: ConditionContext, + ) -> None: + matched_any = False + for match_rule in self._match_rules: + # Fully resolve the matches to avoid RuntimeError caused by collection changing size as a + # consequence of the removal: https://salsa.debian.org/debian/debputy/-/issues/52 + matches = list(match_rule.finditer(fs_root)) + for m in matches: + matched_any = True + parent = m.parent_dir + if parent is None: + self._error( + f"Cannot remove the root directory (triggered by {self._definition_source})" + ) + m.unlink(recursive=True) + if not self._keep_empty_parent_dirs: + parent.prune_if_empty_dir() + # FIXME: `rm` should probably be forgiving or at least support a condition to avoid failures + if not matched_any: + self._match_rule_had_no_matches(match_rule, self._definition_source) + + +class MoveTransformationRule(TransformationRule): + __slots__ = ( + "_match_rule", + "_dest_path", + "_dest_is_dir", + "_definition_source", + "_condition", + ) + + def __init__( + self, + match_rule: MatchRule, + dest_path: str, + dest_is_dir: bool, + definition_source: AttributePath, + condition: Optional[ManifestCondition], + ) -> None: + self._match_rule = match_rule + self._dest_path = dest_path + self._dest_is_dir = dest_is_dir + self._definition_source = definition_source.path + self._condition = condition + + def transform_file_system( + self, fs_root: FSPath, condition_context: ConditionContext + ) -> None: + if not self._evaluate_condition(self._condition, condition_context): + return + # Eager resolve is necessary to avoid "self-recursive" matching in special cases (e.g., **/*.la) + matches = list(self._match_rule.finditer(fs_root)) + if not matches: + self._match_rule_had_no_matches(self._match_rule, self._definition_source) + + target_dir: Optional[VirtualPath] + if self._dest_is_dir: + target_dir = self._ensure_is_directory( + fs_root, + self._dest_path, + self._definition_source, + ) + else: + dir_part, basename = os.path.split(self._dest_path) + target_parent_dir = self._ensure_is_directory( + fs_root, + dir_part, + self._definition_source, + ) + target_dir = target_parent_dir.get(basename) + + if target_dir is None or not target_dir.is_dir: + if len(matches) > 1: + self._error( + f"Could not rename {self._match_rule.describe_match_short()} to {self._dest_path}" + f" (from: {self._definition_source}). Multiple paths matched the pattern and the" + " destination was not a directory. Either correct the pattern to only match ony source" + " OR define the destination to be a directory (E.g., add a trailing slash - example:" + f' "{self._dest_path}/")' + ) + p = matches[0] + if p.path == self._dest_path: + self._error( + f"Error in {self._definition_source}, the source" + f" {self._match_rule.describe_match_short()} matched {self._dest_path} making the" + " rename redundant!?" + ) + p.parent_dir = target_parent_dir + p.name = basename + return + + assert target_dir is not None and target_dir.is_dir + basenames: Dict[str, VirtualPath] = dict() + target_dir_path = target_dir.path + + for m in matches: + if m.path == target_dir_path: + self._error( + f"Error in {self._definition_source}, the source {self._match_rule.describe_match_short()}" + f"matched {self._dest_path} (among other), but it is not possible to copy a directory into" + " itself" + ) + if m.name in basenames: + alt_path = basenames[m.name] + # We document "two *distinct*" paths. However, as the glob matches are written, it should not be + # possible for a *single* glob to match the same path twice. + assert alt_path is not m + self._error( + f"Could not rename {self._match_rule.describe_match_short()} to {self._dest_path}" + f" (from: {self._definition_source}). Multiple paths matched the pattern had the" + f' same basename "{m.name}" ("{m.path}" vs. "{alt_path.path}"). Please correct the' + f" pattern, so it only matches one path with that basename to avoid this conflict." + ) + existing = m.get(m.name) + if existing and existing.is_dir: + self._error( + f"Could not rename {self._match_rule.describe_match_short()} to {self._dest_path}" + f" (from: {self._definition_source}). The pattern matched {m.path} which would replace" + f" the existing directory {existing.path}. If this replacement is intentional, then please" + f' remove "{existing.path}" first (e.g., via `- remove: "{existing.path}"`)' + ) + basenames[m.name] = m + m.parent_dir = target_dir + + +class CreateSymlinkPathTransformationRule(TransformationRule): + __slots__ = ( + "_link_dest", + "_link_target", + "_replacement_rule", + "_definition_source", + "_condition", + ) + + def __init__( + self, + link_target: str, + link_dest: str, + replacement_rule: CreateSymlinkReplacementRule, + definition_source: AttributePath, + condition: Optional[ManifestCondition], + ) -> None: + self._link_target = link_target + self._link_dest = link_dest + self._replacement_rule = replacement_rule + self._definition_source = definition_source.path + self._condition = condition + + def transform_file_system( + self, + fs_root: FSPath, + condition_context: ConditionContext, + ) -> None: + if not self._evaluate_condition(self._condition, condition_context): + return + dir_path_part, link_name = os.path.split(self._link_dest) + dir_path = self._ensure_is_directory( + fs_root, + dir_path_part, + self._definition_source, + ) + existing = dir_path.get(link_name) + if existing: + self._handle_existing_path(existing) + dir_path.add_symlink(link_name, self._link_target) + + def _handle_existing_path(self, existing: VirtualPath) -> None: + replacement_rule = self._replacement_rule + if replacement_rule == "abort-on-non-empty-directory": + unlink = not existing.is_dir or not any(existing.iterdir) + reason = "the path is a non-empty directory" + elif replacement_rule == "discard-existing": + unlink = True + reason = "<<internal error: you should not see an error with this message>>" + elif replacement_rule == "error-if-directory": + unlink = not existing.is_dir + reason = "the path is a directory" + else: + assert replacement_rule == "error-if-exists" + unlink = False + reason = "the path exists" + + if unlink: + existing.unlink(recursive=True) + else: + self._error( + f"Refusing to replace {existing.path} with a symlink; {reason} and" + f" the active replacement-rule was {self._replacement_rule}. You can" + f' set the replacement-rule to "discard-existing", if you are not interested' + f" in the contents of {existing.path}. This error was triggered by {self._definition_source}." + ) + + +class CreateDirectoryTransformationRule(TransformationRule): + __slots__ = ( + "_directories", + "_owner", + "_group", + "_mode", + "_definition_source", + "_condition", + ) + + def __init__( + self, + directories: Sequence[str], + owner: Optional[StaticFileSystemOwner], + group: Optional[StaticFileSystemGroup], + mode: Optional[FileSystemMode], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> None: + super().__init__() + self._directories = directories + self._owner = owner + self._group = group + self._mode = mode + self._definition_source = definition_source + self._condition = condition + + def transform_file_system( + self, + fs_root: FSPath, + condition_context: ConditionContext, + ) -> None: + if not self._evaluate_condition(self._condition, condition_context): + return + owner = self._owner + group = self._group + mode = self._mode + for directory in self._directories: + dir_path = self._ensure_is_directory( + fs_root, + directory, + self._definition_source, + ) + + if mode is not None: + try: + desired_mode = mode.compute_mode(dir_path.mode, dir_path.is_dir) + except ValueError as e: + self._error( + f"Could not compute desired mode for {dir_path.path} as" + f" requested in {self._definition_source}: {e.args[0]}", + caused_by=e, + ) + dir_path.mode = desired_mode + dir_path.chown(owner, group) + + +def _apply_owner_and_mode( + path: VirtualPath, + owner: Optional[StaticFileSystemOwner], + group: Optional[StaticFileSystemGroup], + mode: Optional[FileSystemMode], + capabilities: Optional[str], + capability_mode: Optional[FileSystemMode], + definition_source: str, +) -> None: + if owner is not None or group is not None: + path.chown(owner, group) + if mode is not None: + try: + desired_mode = mode.compute_mode(path.mode, path.is_dir) + except ValueError as e: + raise TransformationRuntimeError( + f"Could not compute desired mode for {path.path} as" + f" requested in {definition_source}: {e.args[0]}" + ) from e + path.mode = desired_mode + + if path.is_file and capabilities is not None: + cap_ref = path.metadata(DebputyCapability) + cap_value = cap_ref.value + if cap_value is not None: + _warn( + f"Replacing the capabilities set on path {path.path} from {cap_value.definition_source} due" + f" to {definition_source}." + ) + assert capability_mode is not None + cap_ref.value = DebputyCapability( + capabilities, + capability_mode, + definition_source, + ) + + +class PathMetadataTransformationRule(TransformationRule): + __slots__ = ( + "_match_rules", + "_owner", + "_group", + "_mode", + "_capabilities", + "_capability_mode", + "_recursive", + "_definition_source", + "_condition", + ) + + def __init__( + self, + match_rules: Sequence[MatchRule], + owner: Optional[StaticFileSystemOwner], + group: Optional[StaticFileSystemGroup], + mode: Optional[FileSystemMode], + recursive: bool, + capabilities: Optional[str], + capability_mode: Optional[FileSystemMode], + definition_source: str, + condition: Optional[ManifestCondition], + ) -> None: + super().__init__() + self._match_rules = match_rules + self._owner = owner + self._group = group + self._mode = mode + self._capabilities = capabilities + self._capability_mode = capability_mode + self._recursive = recursive + self._definition_source = definition_source + self._condition = condition + if self._capabilities is None and self._capability_mode is not None: + raise ValueError("capability_mode without capabilities") + if self._capabilities is not None and self._capability_mode is None: + raise ValueError("capabilities without capability_mode") + + def transform_file_system( + self, + fs_root: FSPath, + condition_context: ConditionContext, + ) -> None: + if not self._evaluate_condition(self._condition, condition_context): + return + owner = self._owner + group = self._group + mode = self._mode + capabilities = self._capabilities + capability_mode = self._capability_mode + definition_source = self._definition_source + d: Optional[List[FSPath]] = [] if self._recursive else None + needs_file_match = False + if self._owner is not None or self._group is not None or self._mode is not None: + needs_file_match = True + + for match_rule in self._match_rules: + match_ok = False + saw_symlink = False + saw_directory = False + + for path in match_rule.finditer(fs_root): + if path.is_symlink: + saw_symlink = True + continue + if path.is_file or not needs_file_match: + match_ok = True + if path.is_dir: + saw_directory = True + if not match_ok and needs_file_match and self._recursive: + match_ok = any(p.is_file for p in path.all_paths()) + _apply_owner_and_mode( + path, + owner, + group, + mode, + capabilities, + capability_mode, + definition_source, + ) + if path.is_dir and d is not None: + d.append(path) + + if not match_ok: + if needs_file_match and (saw_directory or saw_symlink): + _warn( + f"The match rule {match_rule.describe_match_short()} (from {self._definition_source})" + " did not match any files, but given the attributes it can only apply to files." + ) + elif saw_symlink: + _warn( + f"The match rule {match_rule.describe_match_short()} (from {self._definition_source})" + ' matched symlinks, but "path-metadata" cannot apply to symlinks.' + ) + self._match_rule_had_no_matches(match_rule, self._definition_source) + + if not d: + return + for recurse_dir in d: + for path in recurse_dir.all_paths(): + if path.is_symlink: + continue + _apply_owner_and_mode( + path, + owner, + group, + mode, + capabilities, + capability_mode, + definition_source, + ) + + +class ModeNormalizationTransformationRule(TransformationRule): + __slots__ = ("_normalizations",) + + def __init__( + self, + normalizations: Sequence[Tuple[MatchRule, FileSystemMode]], + ) -> None: + self._normalizations = normalizations + + def transform_file_system( + self, + fs_root: FSPath, + condition_context: ConditionContext, + ) -> None: + seen = set() + for match_rule, fs_mode in self._normalizations: + for path in match_rule.finditer( + fs_root, ignore_paths=lambda p: p.path in seen + ): + if path.is_symlink or path.path in seen: + continue + seen.add(path.path) + try: + desired_mode = fs_mode.compute_mode(path.mode, path.is_dir) + except ValueError as e: + raise AssertionError( + "Error while applying built-in mode normalization rule" + ) from e + path.mode = desired_mode + + +class NormalizeShebangLineTransformation(TransformationRule): + def transform_file_system( + self, + fs_root: VirtualPath, + condition_context: ConditionContext, + ) -> None: + for path in fs_root.all_paths(): + if not path.is_file: + continue + try: + with path.open(byte_io=True, buffering=4096) as fd: + interpreter = extract_shebang_interpreter_from_file(fd) + except (PureVirtualPathError, TestPathWithNonExistentFSPathError): + # Do not make tests unnecessarily complex to write + continue + if interpreter is None: + continue + + if interpreter.fixup_needed: + interpreter.replace_shebang_line(path) diff --git a/src/debputy/types.py b/src/debputy/types.py new file mode 100644 index 0000000..05e68c9 --- /dev/null +++ b/src/debputy/types.py @@ -0,0 +1,9 @@ +from typing import TypeVar, TYPE_CHECKING + +if TYPE_CHECKING: + from debputy.plugin.api import VirtualPath + from debputy.filesystem_scan import FSPath + + +VP = TypeVar("VP", "VirtualPath", "FSPath") +S = TypeVar("S", str, bytes) diff --git a/src/debputy/util.py b/src/debputy/util.py new file mode 100644 index 0000000..4da2772 --- /dev/null +++ b/src/debputy/util.py @@ -0,0 +1,804 @@ +import argparse +import collections +import functools +import glob +import logging +import os +import re +import shutil +import subprocess +import sys +import time +from itertools import zip_longest +from pathlib import Path +from typing import ( + NoReturn, + TYPE_CHECKING, + Union, + Set, + FrozenSet, + Optional, + TypeVar, + Dict, + Iterator, + Iterable, + Literal, + Tuple, + Sequence, + List, + Mapping, + Any, +) + +from debian.deb822 import Deb822 + +from debputy.architecture_support import DpkgArchitectureBuildProcessValuesTable +from debputy.exceptions import DebputySubstitutionError + +if TYPE_CHECKING: + from debputy.packages import BinaryPackage + from debputy.substitution import Substitution + + +T = TypeVar("T") + + +SLASH_PRUNE = re.compile("//+") +PKGNAME_REGEX = re.compile(r"[a-z0-9][-+.a-z0-9]+", re.ASCII) +PKGVERSION_REGEX = re.compile( + r""" + (?: \d+ : )? # Optional epoch + \d[0-9A-Za-z.+:~]* # Upstream version (with no hyphens) + (?: - [0-9A-Za-z.+:~]+ )* # Optional debian revision (+ upstreams versions with hyphens) +""", + re.VERBOSE | re.ASCII, +) +DEFAULT_PACKAGE_TYPE = "deb" +DBGSYM_PACKAGE_TYPE = "deb" +UDEB_PACKAGE_TYPE = "udeb" + +POSTINST_DEFAULT_CONDITION = ( + '[ "$1" = "configure" ]' + ' || [ "$1" = "abort-upgrade" ]' + ' || [ "$1" = "abort-deconfigure" ]' + ' || [ "$1" = "abort-remove" ]' +) + + +_SPACE_RE = re.compile(r"\s") +_DOUBLE_ESCAPEES = re.compile(r'([\n`$"\\])') +_REGULAR_ESCAPEES = re.compile(r'([\s!"$()*+#;<>?@\[\]\\`|~])') +_PROFILE_GROUP_SPLIT = re.compile(r">\s+<") +_DEFAULT_LOGGER: Optional[logging.Logger] = None +_STDOUT_HANDLER: Optional[logging.StreamHandler] = None +_STDERR_HANDLER: Optional[logging.StreamHandler] = None + + +def assume_not_none(x: Optional[T]) -> T: + if x is None: # pragma: no cover + raise ValueError( + 'Internal error: None was given, but the receiver assumed "not None" here' + ) + return x + + +def _info(msg: str) -> None: + global _DEFAULT_LOGGER + logger = _DEFAULT_LOGGER + if logger: + logger.info(msg) + # No fallback print for info + + +def _error(msg: str, *, prog: Optional[str] = None) -> "NoReturn": + global _DEFAULT_LOGGER + logger = _DEFAULT_LOGGER + if logger: + logger.error(msg) + else: + me = os.path.basename(sys.argv[0]) if prog is None else prog + print( + f"{me}: error: {msg}", + file=sys.stderr, + ) + sys.exit(1) + + +def _warn(msg: str, *, prog: Optional[str] = None) -> None: + global _DEFAULT_LOGGER + logger = _DEFAULT_LOGGER + if logger: + logger.warning(msg) + else: + me = os.path.basename(sys.argv[0]) if prog is None else prog + + print( + f"{me}: warning: {msg}", + file=sys.stderr, + ) + + +class ColorizedArgumentParser(argparse.ArgumentParser): + def error(self, message: str) -> NoReturn: + self.print_usage(sys.stderr) + _error(message, prog=self.prog) + + +def ensure_dir(path: str) -> None: + if not os.path.isdir(path): + os.makedirs(path, mode=0o755, exist_ok=True) + + +def _clean_path(orig_p: str) -> str: + p = SLASH_PRUNE.sub("/", orig_p) + if "." in p: + path_base = p + # We permit a single leading "./" because we add that when we normalize a path, and we want normalization + # of a normalized path to be a no-op. + if path_base.startswith("./"): + path_base = path_base[2:] + assert path_base + for segment in path_base.split("/"): + if segment in (".", ".."): + raise ValueError( + 'Please provide paths that are normalized (i.e., no ".." or ".").' + f' Offending input "{orig_p}"' + ) + return p + + +def _normalize_path(path: str, with_prefix: bool = True) -> str: + path = path.strip("/") + if not path or path == ".": + return "." + if "//" in path or "." in path: + path = _clean_path(path) + if with_prefix ^ path.startswith("./"): + if with_prefix: + path = "./" + path + else: + path = path[2:] + return path + + +def _normalize_link_target(link_target: str) -> str: + link_target = SLASH_PRUNE.sub("/", link_target.lstrip("/")) + result: List[str] = [] + for segment in link_target.split("/"): + if segment in (".", ""): + # Ignore these - the empty string is generally a trailing slash + continue + if segment == "..": + # We ignore "root escape attempts" like the OS would (mapping /.. -> /) + if result: + result.pop() + else: + result.append(segment) + return "/".join(result) + + +def _backslash_escape(m: re.Match[str]) -> str: + return "\\" + m.group(0) + + +def _escape_shell_word(w: str) -> str: + if _SPACE_RE.match(w): + w = _DOUBLE_ESCAPEES.sub(_backslash_escape, w) + return f'"{w}"' + return _REGULAR_ESCAPEES.sub(_backslash_escape, w) + + +def escape_shell(*args: str) -> str: + return " ".join(_escape_shell_word(w) for w in args) + + +def print_command(*args: str) -> None: + print(f" {escape_shell(*args)}") + + +def debian_policy_normalize_symlink_target( + link_path: str, + link_target: str, + normalize_link_path: bool = False, +) -> str: + if normalize_link_path: + link_path = _normalize_path(link_path) + elif not link_path.startswith("./"): + raise ValueError("Link part was not normalized") + + link_path = link_path[2:] + + if not link_target.startswith("/"): + link_target = "/" + os.path.dirname(link_path) + "/" + link_target + + link_path_parts = link_path.split("/") + link_target_parts = [ + s for s in _normalize_link_target(link_target).split("/") if s != "." + ] + + assert link_path_parts + + if link_target_parts and link_path_parts[0] == link_target_parts[0]: + # Per Debian Policy, must be relative + + # First determine the length of the overlap + common_segment_count = 1 + shortest_path_length = min(len(link_target_parts), len(link_path_parts)) + while ( + common_segment_count < shortest_path_length + and link_target_parts[common_segment_count] + == link_path_parts[common_segment_count] + ): + common_segment_count += 1 + + if common_segment_count == shortest_path_length and len( + link_path_parts + ) - 1 == len(link_target_parts): + normalized_link_target = "." + else: + up_dir_count = len(link_path_parts) - 1 - common_segment_count + normalized_link_target_parts = [] + if up_dir_count: + up_dir_part = "../" * up_dir_count + # We overshoot with a single '/', so rstrip it away + normalized_link_target_parts.append(up_dir_part.rstrip("/")) + # Add the relevant down parts + normalized_link_target_parts.extend( + link_target_parts[common_segment_count:] + ) + + normalized_link_target = "/".join(normalized_link_target_parts) + else: + # Per Debian Policy, must be absolute + normalized_link_target = "/" + "/".join(link_target_parts) + + return normalized_link_target + + +def has_glob_magic(pattern: str) -> bool: + return glob.has_magic(pattern) or "{" in pattern + + +def glob_escape(replacement_value: str) -> str: + if not glob.has_magic(replacement_value) or "{" not in replacement_value: + return replacement_value + return ( + replacement_value.replace("[", "[[]") + .replace("]", "[]]") + .replace("*", "[*]") + .replace("?", "[?]") + .replace("{", "[{]") + .replace("}", "[}]") + ) + + +# TODO: This logic should probably be moved to `python-debian` +def active_profiles_match( + profiles_raw: str, + active_build_profiles: Union[Set[str], FrozenSet[str]], +) -> bool: + profiles_raw = profiles_raw.strip() + if profiles_raw[0] != "<" or profiles_raw[-1] != ">" or profiles_raw == "<>": + raise ValueError( + 'Invalid Build-Profiles: Must start start and end with "<" + ">" but cannot be a literal "<>"' + ) + profile_groups = _PROFILE_GROUP_SPLIT.split(profiles_raw[1:-1]) + for profile_group_raw in profile_groups: + should_process_package = True + for profile_name in profile_group_raw.split(): + negation = False + if profile_name[0] == "!": + negation = True + profile_name = profile_name[1:] + + matched_profile = profile_name in active_build_profiles + if matched_profile == negation: + should_process_package = False + break + + if should_process_package: + return True + + return False + + +def _parse_build_profiles(build_profiles_raw: str) -> FrozenSet[FrozenSet[str]]: + profiles_raw = build_profiles_raw.strip() + if profiles_raw[0] != "<" or profiles_raw[-1] != ">" or profiles_raw == "<>": + raise ValueError( + 'Invalid Build-Profiles: Must start start and end with "<" + ">" but cannot be a literal "<>"' + ) + profile_groups = _PROFILE_GROUP_SPLIT.split(profiles_raw[1:-1]) + return frozenset(frozenset(g.split()) for g in profile_groups) + + +def resolve_source_date_epoch( + command_line_value: Optional[int], + *, + substitution: Optional["Substitution"] = None, +) -> int: + mtime = command_line_value + if mtime is None and "SOURCE_DATE_EPOCH" in os.environ: + sde_raw = os.environ["SOURCE_DATE_EPOCH"] + if sde_raw == "": + _error("SOURCE_DATE_EPOCH is set but empty.") + mtime = int(sde_raw) + if mtime is None and substitution is not None: + try: + sde_raw = substitution.substitute( + "{{SOURCE_DATE_EPOCH}}", + "Internal resolution", + ) + mtime = int(sde_raw) + except (DebputySubstitutionError, ValueError): + pass + if mtime is None: + mtime = int(time.time()) + os.environ["SOURCE_DATE_EPOCH"] = str(mtime) + return mtime + + +def compute_output_filename(control_root_dir: str, is_udeb: bool) -> str: + with open(os.path.join(control_root_dir, "control"), "rt") as fd: + control_file = Deb822(fd) + + package_name = control_file["Package"] + package_version = control_file["Version"] + package_architecture = control_file["Architecture"] + extension = control_file.get("Package-Type") or "deb" + if ":" in package_version: + package_version = package_version.split(":", 1)[1] + if is_udeb: + extension = "udeb" + + return f"{package_name}_{package_version}_{package_architecture}.{extension}" + + +_SCRATCH_DIR = None +_DH_INTEGRATION_MODE = False + + +def integrated_with_debhelper() -> None: + global _DH_INTEGRATION_MODE + _DH_INTEGRATION_MODE = True + + +def scratch_dir() -> str: + global _SCRATCH_DIR + if _SCRATCH_DIR is not None: + return _SCRATCH_DIR + debputy_scratch_dir = "debian/.debputy/scratch-dir" + is_debputy_dir = True + if os.path.isdir("debian/.debputy") and not _DH_INTEGRATION_MODE: + _SCRATCH_DIR = debputy_scratch_dir + elif os.path.isdir("debian/.debhelper") or _DH_INTEGRATION_MODE: + _SCRATCH_DIR = "debian/.debhelper/_debputy/scratch-dir" + is_debputy_dir = False + else: + _SCRATCH_DIR = debputy_scratch_dir + ensure_dir(_SCRATCH_DIR) + if is_debputy_dir: + Path("debian/.debputy/.gitignore").write_text("*\n") + return _SCRATCH_DIR + + +_RUNTIME_CONTAINER_DIR_KEY: Optional[str] = None + + +def generated_content_dir( + *, + package: Optional["BinaryPackage"] = None, + subdir_key: Optional[str] = None, +) -> str: + global _RUNTIME_CONTAINER_DIR_KEY + container_dir = _RUNTIME_CONTAINER_DIR_KEY + first_run = False + + if container_dir is None: + first_run = True + container_dir = f"_pb-{os.getpid()}" + _RUNTIME_CONTAINER_DIR_KEY = container_dir + + directory = os.path.join(scratch_dir(), container_dir) + + if first_run and os.path.isdir(directory): + # In the unlikely case there is a re-run with exactly the same pid, `debputy` should not + # see "stale" data. + # TODO: Ideally, we would always clean up this directory on failure, but `atexit` is not + # reliable enough for that and we do not have an obvious hook for it. + shutil.rmtree(directory) + + directory = os.path.join( + directory, + "generated-fs-content", + f"pkg_{package.name}" if package else "no-package", + ) + if subdir_key is not None: + directory = os.path.join(directory, subdir_key) + + os.makedirs(directory, exist_ok=True) + return directory + + +PerlIncDir = collections.namedtuple("PerlIncDir", ["vendorlib", "vendorarch"]) +PerlConfigData = collections.namedtuple("PerlConfigData", ["version", "debian_abi"]) +_PERL_MODULE_DIRS: Dict[str, PerlIncDir] = {} + + +@functools.lru_cache(1) +def _perl_config_data() -> PerlConfigData: + d = ( + subprocess.check_output( + [ + "perl", + "-MConfig", + "-e", + 'print "$Config{version}\n$Config{debian_abi}\n"', + ] + ) + .decode("utf-8") + .splitlines() + ) + return PerlConfigData(*d) + + +def _perl_version() -> str: + return _perl_config_data().version + + +def perlxs_api_dependency() -> str: + # dh_perl used the build version of perl for this, so we will too. Most of the perl cross logic + # assumes that the major version of build variant of Perl is the same as the host variant of Perl. + config = _perl_config_data() + if config.debian_abi is not None and config.debian_abi != "": + return f"perlapi-{config.debian_abi}" + return f"perlapi-{config.version}" + + +def perl_module_dirs( + dpkg_architecture_variables: DpkgArchitectureBuildProcessValuesTable, + dctrl_bin: "BinaryPackage", +) -> PerlIncDir: + global _PERL_MODULE_DIRS + arch = ( + dctrl_bin.resolved_architecture + if dpkg_architecture_variables.is_cross_compiling + else "_default_" + ) + module_dir = _PERL_MODULE_DIRS.get(arch) + if module_dir is None: + cmd = ["perl"] + if dpkg_architecture_variables.is_cross_compiling: + version = _perl_version() + inc_dir = f"/usr/lib/{dctrl_bin.deb_multiarch}/perl/cross-config-{version}" + # FIXME: This should not fallback to "build-arch" but on the other hand, we use the perl module dirs + # for every package at the moment. So mandating correct perl dirs implies mandating perl-xs-dev in + # cross builds... meh. + if os.path.exists(os.path.join(inc_dir, "Config.pm")): + cmd.append(f"-I{inc_dir}") + cmd.extend( + ["-MConfig", "-e", 'print "$Config{vendorlib}\n$Config{vendorarch}\n"'] + ) + output = subprocess.check_output(cmd).decode("utf-8").splitlines(keepends=False) + if len(output) != 2: + raise ValueError( + "Internal error: Unable to determine the perl include directories:" + f" Raw output from perl snippet: {output}" + ) + module_dir = PerlIncDir( + vendorlib=_normalize_path(output[0]), + vendorarch=_normalize_path(output[1]), + ) + _PERL_MODULE_DIRS[arch] = module_dir + return module_dir + + +@functools.lru_cache(1) +def detect_fakeroot() -> bool: + if os.getuid() != 0 or "LD_PRELOAD" not in os.environ: + return False + env = dict(os.environ) + del env["LD_PRELOAD"] + try: + return subprocess.check_output(["id", "-u"], env=env).strip() != b"0" + except subprocess.CalledProcessError: + print( + 'Could not run "id -u" with LD_PRELOAD unset; assuming we are not run under fakeroot', + file=sys.stderr, + ) + return False + + +@functools.lru_cache(1) +def _sc_arg_max() -> Optional[int]: + try: + return os.sysconf("SC_ARG_MAX") + except RuntimeError: + _warn("Could not resolve SC_ARG_MAX, falling back to a hard-coded limit") + return None + + +def _split_xargs_args( + static_cmd: Sequence[str], + max_args_byte_len: int, + varargs: Iterable[str], + reuse_list_ok: bool, +) -> Iterator[List[str]]: + static_cmd_len = len(static_cmd) + remaining_len = max_args_byte_len + pending_args = list(static_cmd) + for arg in varargs: + arg_len = len(arg.encode("utf-8")) + 1 # +1 for leading space + remaining_len -= arg_len + if not remaining_len: + if len(pending_args) <= static_cmd_len: + raise ValueError( + f"Could not fit a single argument into the command line !?" + f" {max_args_byte_len} (variable argument limit) < {arg_len} (argument length)" + ) + yield pending_args + remaining_len = max_args_byte_len - arg_len + if reuse_list_ok: + pending_args.clear() + pending_args.extend(static_cmd) + else: + pending_args = list(static_cmd) + pending_args.append(arg) + + if len(pending_args) > static_cmd_len: + yield pending_args + + +def xargs( + static_cmd: Sequence[str], + varargs: Iterable[str], + *, + env: Optional[Mapping[str, str]] = None, + reuse_list_ok: bool = False, +) -> Iterator[List[str]]: + max_args_bytes = _sc_arg_max() + # len overshoots with one space explaining the -1. The _split_xargs_args + # will account for the space for the first argument + static_byte_len = ( + len(static_cmd) - 1 + sum(len(a.encode("utf-8")) for a in static_cmd) + ) + if max_args_bytes is not None: + if env is None: + # +2 for nul bytes after key and value + static_byte_len += sum(len(k) + len(v) + 2 for k, v in os.environb.items()) + else: + # +2 for nul bytes after key and value + static_byte_len += sum( + len(k.encode("utf-8")) + len(v.encode("utf-8")) + 2 + for k, v in env.items() + ) + # Add a fixed buffer for OS overhead here (in case env and cmd both must be page-aligned or something like + # that) + static_byte_len += 2 * 4096 + else: + # The 20 000 limit is from debhelper, and it did not account for environment. So neither will we here. + max_args_bytes = 20_000 + remain_len = max_args_bytes - static_byte_len + yield from _split_xargs_args(static_cmd, remain_len, varargs, reuse_list_ok) + + +# itertools recipe +def grouper( + iterable: Iterable[T], + n: int, + *, + incomplete: Literal["fill", "strict", "ignore"] = "fill", + fillvalue: Optional[T] = None, +) -> Iterator[Tuple[T, ...]]: + """Collect data into non-overlapping fixed-length chunks or blocks""" + # grouper('ABCDEFG', 3, fillvalue='x') --> ABC DEF Gxx + # grouper('ABCDEFG', 3, incomplete='strict') --> ABC DEF ValueError + # grouper('ABCDEFG', 3, incomplete='ignore') --> ABC DEF + args = [iter(iterable)] * n + if incomplete == "fill": + return zip_longest(*args, fillvalue=fillvalue) + if incomplete == "strict": + return zip(*args, strict=True) + if incomplete == "ignore": + return zip(*args) + else: + raise ValueError("Expected fill, strict, or ignore") + + +_LOGGING_SET_UP = False + + +def _check_color() -> Tuple[bool, bool, Optional[str]]: + dpkg_or_default = os.environ.get( + "DPKG_COLORS", "never" if "NO_COLOR" in os.environ else "auto" + ) + requested_color = os.environ.get("DEBPUTY_COLORS", dpkg_or_default) + bad_request = None + if requested_color not in {"auto", "always", "never"}: + bad_request = requested_color + requested_color = "auto" + + if requested_color == "auto": + stdout_color = sys.stdout.isatty() + stderr_color = sys.stdout.isatty() + else: + enable = requested_color == "always" + stdout_color = enable + stderr_color = enable + return stdout_color, stderr_color, bad_request + + +def program_name() -> str: + name = os.path.basename(sys.argv[0]) + if name.endswith(".py"): + name = name[:-3] + if name == "__main__": + name = os.path.basename(os.path.dirname(sys.argv[0])) + # FIXME: Not optimal that we have to hardcode these kind of things here + if name == "debputy_cmd": + name = "debputy" + return name + + +def package_cross_check_precheck( + pkg_a: "BinaryPackage", + pkg_b: "BinaryPackage", +) -> Tuple[bool, bool]: + """Whether these two packages can do content cross-checks + + :param pkg_a: The first package + :param pkg_b: The second package + :return: A tuple if two booleans. If the first is True, then binary_package_a may do content cross-checks + that invoĺves binary_package_b. If the second is True, then binary_package_b may do content cross-checks + that involves binary_package_a. Both can be True and both can be False at the same time, which + happens in common cases (arch:all + arch:any cases both to be False as a common example). + """ + + # Handle the two most obvious base-cases + if not pkg_a.should_be_acted_on or not pkg_b.should_be_acted_on: + return False, False + if pkg_a.is_arch_all ^ pkg_b.is_arch_all: + return False, False + + a_may_see_b = True + b_may_see_a = True + + a_bp = pkg_a.fields.get("Build-Profiles", "") + b_bp = pkg_b.fields.get("Build-Profiles", "") + + if a_bp != b_bp: + a_bp_set = _parse_build_profiles(a_bp) if a_bp != "" else frozenset() + b_bp_set = _parse_build_profiles(b_bp) if b_bp != "" else frozenset() + + # Check for build profiles being identically but just ordered differently. + if a_bp_set != b_bp_set: + # For simplicity, we let groups cancel each other out. If one side has no clauses + # left, then it will always be built when the other is built. + # + # Eventually, someone will be here with a special case where more complex logic is + # required. Good luck to you! Remember to add test cases for it (the existing logic + # has some for a reason and if the logic is going to be more complex, it will need + # tests cases to assert it fixes the problem and does not regress) + if a_bp_set - b_bp_set: + a_may_see_b = False + if b_bp_set - a_bp_set: + b_may_see_a = False + + if pkg_a.declared_architecture != pkg_b.declared_architecture: + # Also here we could do a subset check, but wildcards vs. non-wildcards make that a pain + if pkg_a.declared_architecture != "any": + b_may_see_a = False + if pkg_a.declared_architecture != "any": + a_may_see_b = False + + return a_may_see_b, b_may_see_a + + +def setup_logging( + *, log_only_to_stderr: bool = False, reconfigure_logging: bool = False +) -> None: + global _LOGGING_SET_UP, _DEFAULT_LOGGER, _STDOUT_HANDLER, _STDERR_HANDLER + if _LOGGING_SET_UP and not reconfigure_logging: + raise RuntimeError( + "Logging has already been configured." + " Use reconfigure_logging=True if you need to reconfigure it" + ) + stdout_color, stderr_color, bad_request = _check_color() + + if stdout_color or stderr_color: + try: + import colorlog + except ImportError: + stdout_color = False + stderr_color = False + + if log_only_to_stderr: + stdout = sys.stderr + stdout_color = stderr_color + else: + stdout = sys.stderr + + class LogLevelFilter(logging.Filter): + def __init__(self, threshold: int, above: bool): + super().__init__() + self.threshold = threshold + self.above = above + + def filter(self, record: logging.LogRecord) -> bool: + if self.above: + return record.levelno >= self.threshold + else: + return record.levelno < self.threshold + + color_format = ( + "{bold}{name}{reset}: {bold}{log_color}{levelnamelower}{reset}: {message}" + ) + colorless_format = "{name}: {levelnamelower}: {message}" + + existing_stdout_handler = _STDOUT_HANDLER + existing_stderr_handler = _STDERR_HANDLER + + if stdout_color: + stdout_handler = colorlog.StreamHandler(stdout) + stdout_handler.setFormatter( + colorlog.ColoredFormatter(color_format, style="{", force_color=True) + ) + logger = colorlog.getLogger() + if existing_stdout_handler is not None: + logger.removeHandler(existing_stdout_handler) + _STDOUT_HANDLER = stdout_handler + logger.addHandler(stdout_handler) + else: + stdout_handler = logging.StreamHandler(stdout) + stdout_handler.setFormatter(logging.Formatter(colorless_format, style="{")) + logger = logging.getLogger() + if existing_stdout_handler is not None: + logger.removeHandler(existing_stdout_handler) + _STDOUT_HANDLER = stdout_handler + logger.addHandler(stdout_handler) + + if stderr_color: + stderr_handler = colorlog.StreamHandler(sys.stderr) + stderr_handler.setFormatter( + colorlog.ColoredFormatter(color_format, style="{", force_color=True) + ) + logger = logging.getLogger() + if existing_stdout_handler is not None: + logger.removeHandler(existing_stderr_handler) + _STDERR_HANDLER = stderr_handler + logger.addHandler(stderr_handler) + else: + stderr_handler = logging.StreamHandler(sys.stderr) + stderr_handler.setFormatter(logging.Formatter(colorless_format, style="{")) + logger = logging.getLogger() + if existing_stdout_handler is not None: + logger.removeHandler(existing_stderr_handler) + _STDERR_HANDLER = stderr_handler + logger.addHandler(stderr_handler) + + stdout_handler.addFilter(LogLevelFilter(logging.WARN, False)) + stderr_handler.addFilter(LogLevelFilter(logging.WARN, True)) + + name = program_name() + + old_factory = logging.getLogRecordFactory() + + def record_factory( + *args: Any, **kwargs: Any + ) -> logging.LogRecord: # pragma: no cover + record = old_factory(*args, **kwargs) + record.levelnamelower = record.levelname.lower() + return record + + logging.setLogRecordFactory(record_factory) + + logging.getLogger().setLevel(logging.INFO) + _DEFAULT_LOGGER = logging.getLogger(name) + + if bad_request: + _DEFAULT_LOGGER.warning( + f'Invalid color request for "{bad_request}" in either DEBPUTY_COLORS or DPKG_COLORS.' + ' Resetting to "auto".' + ) + + _LOGGING_SET_UP = True diff --git a/src/debputy/version.py b/src/debputy/version.py new file mode 100644 index 0000000..de56318 --- /dev/null +++ b/src/debputy/version.py @@ -0,0 +1,67 @@ +from typing import Optional, Callable + +__version__ = "N/A" + +IS_RELEASE_BUILD = False + +if __version__ in ("N/A",): + import subprocess + + class LazyString: + def __init__(self, initializer: Callable[[], str]) -> None: + self._initializer = initializer + self._value: Optional[str] = None + + def __str__(self) -> str: + value = object.__getattribute__(self, "_value") + if value is None: + value = object.__getattribute__(self, "_initializer")() + object.__setattr__(self, "_value", value) + return value + + def __getattribute__(self, item): + value = str(self) + return getattr(value, item) + + def __contains__(self, item): + return item in str(self) + + def _initialize_version() -> str: + try: + devnull: Optional[int] = subprocess.DEVNULL + except AttributeError: + devnull = None # Not supported, but not critical + + try: + v = ( + subprocess.check_output( + ["git", "describe", "--tags"], + stderr=devnull, + ) + .strip() + .decode("utf-8") + ) + except (subprocess.CalledProcessError, FileNotFoundError): + try: + v = ( + subprocess.check_output( + ["dpkg-parsechangelog", "-SVersion"], + stderr=devnull, + ) + .strip() + .decode("utf-8") + ) + + except (subprocess.CalledProcessError, FileNotFoundError): + v = "N/A" + + if v.startswith("debian/"): + v = v[7:] + return v + + __version__ = LazyString(_initialize_version) + IS_RELEASE_BUILD = False + +else: + # Disregard snapshot versions (gbp dch -S) as "release builds" + IS_RELEASE_BUILD = ".gbp" not in __version__ |