1096 lines
41 KiB
Python
Executable file
1096 lines
41 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
#
|
|
# Copyright 2020 Johannes Schauer Marin Rodrigues <josch@debian.org>
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
|
|
# snapshot.d.o API feature requests:
|
|
#
|
|
# Currently, the API does not allow to list all dates for which a snapshot
|
|
# was made. This would be useful to allow skipping snapshots. Currently we
|
|
# blindly bisect but without knowing which date on snapshot.d.o a given
|
|
# timestamp resolves to, we cannot mark it as untestable (see feature request
|
|
# above) and without a list of testable timestamps we cannot reliably test
|
|
# a timestamp before and after the one to skip.
|
|
# See also: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=969603
|
|
#
|
|
# It would be useful to know when a package version was first seen in a given
|
|
# suite. Without this knowledge we cannot reliably pick the snapshot timestamps
|
|
# at which we want to test a given suite. For example, a package version might
|
|
# appear in experimental long before it appears in unstable or any other suite
|
|
# that is to be tested. Thus, the first_seen attribute of the snapshot API is
|
|
# not very useful to determine a limited set of timestamps to test.
|
|
# See also: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=806329
|
|
|
|
# wishlist features
|
|
# -----------------
|
|
#
|
|
# - restrict set of tested snapshot timestamps to those where a given package
|
|
# set actually changed (needs a resolution to #806329)
|
|
#
|
|
# - allow marking snapshot timestamps as skippable, for example via script
|
|
# exit code (needs resolution to #969603)
|
|
#
|
|
# - add convenience function which builds a given source package and installs
|
|
# its build dependencies automatically
|
|
|
|
# complains about log_message cannot be fixed because the original function
|
|
# names one of its arguments "format" which is also forbidden...
|
|
# pylint: disable=W0221
|
|
#
|
|
# pylint complains about too many branches but the code would not become more
|
|
# readable by spreading it out across more functions
|
|
# pylint: disable=R0912
|
|
#
|
|
# allow more than 1000 lines in this file
|
|
# pylint: disable=C0302
|
|
#
|
|
# TODO: Address invalid names
|
|
# pylint: disable=invalid-name
|
|
|
|
import argparse
|
|
import atexit
|
|
import collections
|
|
import email.utils
|
|
import http.client
|
|
import http.server
|
|
import io
|
|
import logging
|
|
import lzma
|
|
import math
|
|
import os
|
|
import pathlib
|
|
import pty
|
|
import re
|
|
import select
|
|
import subprocess
|
|
import sys
|
|
from datetime import date, datetime, time, timedelta, timezone
|
|
|
|
import debian
|
|
import debian.deb822
|
|
import requests
|
|
|
|
from devscripts.proxy import setupcache
|
|
|
|
HAVE_DATEUTIL = True
|
|
try:
|
|
import dateutil.parser
|
|
except ImportError:
|
|
HAVE_DATEUTIL = False
|
|
|
|
HAVE_PARSEDATETIME = True
|
|
try:
|
|
import parsedatetime
|
|
except ImportError:
|
|
HAVE_PARSEDATETIME = False
|
|
|
|
DINSTALLRATE = 21600
|
|
|
|
|
|
def format_timestamp(timestamp):
|
|
return timestamp.strftime("%Y%m%dT%H%M%SZ")
|
|
|
|
|
|
# We utilize the fact that the HTTP interface of snapshot.d.o responds with a
|
|
# HTTP 301 redirect to the corresponding timestamp.
|
|
# It would be better if there as an officially documented API function:
|
|
# http://bugs.debian.org/969605
|
|
def sanitize_timestamp(timestamp):
|
|
path = "/archive/debian/"
|
|
conn = http.client.HTTPConnection("snapshot.debian.org")
|
|
conn.request("HEAD", path + timestamp.strftime("%Y%m%dT%H%M%SZ") + "/")
|
|
res = conn.getresponse()
|
|
if res.status == 200:
|
|
return timestamp
|
|
assert res.status in (301, 302), res.status # moved permanently or temporarily
|
|
prefix = f"http://snapshot.debian.org{path}"
|
|
location = res.headers["Location"]
|
|
assert location.startswith(prefix) or location.startswith(path), location
|
|
# location is either a full URL or a relative path ending in
|
|
# /archive/debian/<timestamp>/. All we need is <timestamp>
|
|
location = location.rsplit("/", 2)[-2]
|
|
return datetime.strptime(location, "%Y%m%dT%H%M%S%z")
|
|
|
|
|
|
def srcpkgversions_by_timestamp(srcpkgname, timestamp, suite):
|
|
versions = set()
|
|
timestamp_str = timestamp.strftime("%Y%m%dT%H%M%SZ")
|
|
r = requests.get(
|
|
f"http://snapshot.debian.org/archive/debian/{timestamp_str}"
|
|
f"/dists/{suite}/main/source/Sources.xz",
|
|
timeout=60,
|
|
)
|
|
data = lzma.decompress(r.content)
|
|
for src in debian.deb822.Sources.iter_paragraphs(io.BytesIO(data)):
|
|
if src["Package"] != srcpkgname:
|
|
continue
|
|
versions.add(debian.debian_support.Version(src["Version"]))
|
|
return versions
|
|
|
|
|
|
def binpkgversion_by_timestamp(binpkgname, timestamp, suite, architecture):
|
|
timestamp_str = timestamp.strftime("%Y%m%dT%H%M%SZ")
|
|
r = requests.get(
|
|
f"http://snapshot.debian.org/archive/debian/{timestamp_str}"
|
|
f"/dists/{suite}/main/binary-{architecture}/Packages.xz",
|
|
timeout=60,
|
|
)
|
|
data = lzma.decompress(r.content)
|
|
for pkg in debian.deb822.Packages.iter_paragraphs(io.BytesIO(data)):
|
|
if pkg["Package"] == binpkgname:
|
|
return debian.debian_support.Version(pkg["Version"])
|
|
return None
|
|
|
|
|
|
# This function does something similar to what this wiki page describes
|
|
# https://wiki.debian.org/BisectDebian#Finding_dates_for_specific_packages
|
|
#
|
|
# The problem with the approach on that wiki page as well as the one below in
|
|
# Python is, that it relies on the first_seen entry provided by snapshot.d.o.
|
|
# This means that we do not know when a package first appeared in a given
|
|
# suite. It could've first appeared in experimental or even in Debian Ports.
|
|
#
|
|
# Also see: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=806329
|
|
def first_seen_by_pkg(packages, timestamp_begin, timestamp_end, suite, architecture):
|
|
timestamps = set()
|
|
for pkg in packages:
|
|
logging.info("obtaining versions for %s", pkg)
|
|
if pkg.startswith("src:"):
|
|
pkg = pkg[4:]
|
|
oldest_versions = srcpkgversions_by_timestamp(pkg, timestamp_begin, suite)
|
|
if len(oldest_versions) == 0:
|
|
logging.error(
|
|
"source package %s cannot be found in good timestamp", pkg
|
|
)
|
|
sys.exit(1)
|
|
elif len(oldest_versions) == 1:
|
|
oldest_version = oldest_versions.pop()
|
|
else:
|
|
oldest_version = min(oldest_versions)
|
|
newest_versions = srcpkgversions_by_timestamp(pkg, timestamp_end, suite)
|
|
if len(newest_versions) == 0:
|
|
logging.error("source package %s cannot be found in bad timestamp", pkg)
|
|
sys.exit(1)
|
|
elif len(newest_versions) == 1:
|
|
newest_version = newest_versions.pop()
|
|
else:
|
|
newest_version = max(newest_versions)
|
|
|
|
for result in requests.get(
|
|
f"http://snapshot.debian.org/mr/package/{pkg}/", timeout=60
|
|
).json()["result"]:
|
|
if debian.debian_support.Version(result["version"]) < oldest_version:
|
|
continue
|
|
if debian.debian_support.Version(result["version"]) > newest_version:
|
|
continue
|
|
r = requests.get(
|
|
f"http://snapshot.debian.org/mr/package/{pkg}"
|
|
f"/{result['version']}/allfiles?fileinfo=1",
|
|
timeout=60,
|
|
)
|
|
logging.info("retrieving for: %s", result["version"])
|
|
for fileinfo in [
|
|
fileinfo
|
|
for fileinfos in r.json()["fileinfo"].values()
|
|
for fileinfo in fileinfos
|
|
]:
|
|
if fileinfo["archive_name"] != "debian":
|
|
continue
|
|
timestamps.add(
|
|
datetime.strptime(fileinfo["first_seen"], "%Y%m%dT%H%M%S%z")
|
|
)
|
|
else:
|
|
oldest_version = binpkgversion_by_timestamp(
|
|
pkg, timestamp_begin, suite, architecture
|
|
)
|
|
if oldest_version is None:
|
|
logging.error(
|
|
"binary package %s cannot be found in good timestamp", pkg
|
|
)
|
|
sys.exit(1)
|
|
newest_version = binpkgversion_by_timestamp(
|
|
pkg, timestamp_end, suite, architecture
|
|
)
|
|
if newest_version is None:
|
|
logging.error("binary package %s cannot be found in bad timestamp", pkg)
|
|
sys.exit(1)
|
|
r = requests.get(f"http://snapshot.debian.org/mr/binary/{pkg}/", timeout=60)
|
|
for result in r.json()["result"]:
|
|
if debian.debian_support.Version(result["version"]) < oldest_version:
|
|
continue
|
|
if debian.debian_support.Version(result["version"]) > newest_version:
|
|
continue
|
|
r = requests.get(
|
|
f"http://snapshot.debian.org/mr/binary/{pkg}"
|
|
f"/{result['version']}/binfiles?fileinfo=1",
|
|
timeout=60,
|
|
)
|
|
logging.info("retrieving for: %s", result["version"])
|
|
hashes = [
|
|
e["hash"]
|
|
for e in r.json()["result"]
|
|
if e["architecture"] == architecture
|
|
]
|
|
for fileinfo in [
|
|
fileinfo for h in hashes for fileinfo in r.json()["fileinfo"][h]
|
|
]:
|
|
if fileinfo["archive_name"] != "debian":
|
|
continue
|
|
timestamps.add(
|
|
datetime.strptime(fileinfo["first_seen"], "%Y%m%dT%H%M%S%z")
|
|
)
|
|
return timestamps
|
|
|
|
|
|
def runtest_cmd(cmd, env):
|
|
ret = 0
|
|
output = b""
|
|
try:
|
|
# we only use the more complex Popen method if live output is required
|
|
# for logging levels of INFO or lower
|
|
if logging.root.isEnabledFor(logging.INFO):
|
|
parent_fd, child_fd = pty.openpty()
|
|
with subprocess.Popen(
|
|
cmd,
|
|
stdin=child_fd,
|
|
stderr=child_fd,
|
|
stdout=child_fd,
|
|
close_fds=True,
|
|
env=env,
|
|
) as process:
|
|
buf = io.BytesIO()
|
|
os.close(child_fd)
|
|
while process.poll() is None:
|
|
ready, _, _ = select.select([parent_fd], [], [], 1)
|
|
if parent_fd in ready:
|
|
try:
|
|
data = os.read(parent_fd, 10240)
|
|
except OSError:
|
|
break
|
|
if not data:
|
|
break # EOF
|
|
os.write(sys.stdout.fileno(), data)
|
|
buf.write(data)
|
|
os.close(parent_fd)
|
|
ret = process.wait()
|
|
output = buf.getvalue()
|
|
else:
|
|
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, env=env)
|
|
except subprocess.CalledProcessError as e:
|
|
ret = e.returncode
|
|
output = e.output
|
|
return (ret, output)
|
|
|
|
|
|
def runtest(timestamp, staticargs, toupgrade=None, badtimestamp=None):
|
|
timestamp_str = timestamp.strftime("%Y%m%dT%H%M%SZ")
|
|
goodmirror = f"http://snapshot.debian.org/archive/debian/{timestamp_str}"
|
|
env = {k: v for k, v in os.environ.items() if k.startswith("DEBIAN_BISECT_")}
|
|
env["DEBIAN_BISECT_EPOCH"] = str(int(timestamp.timestamp()))
|
|
env["DEBIAN_BISECT_TIMESTAMP"] = timestamp.strftime("%Y%m%dT%H%M%SZ")
|
|
env["PATH"] = os.environ.get("PATH", "/usr/sbin:/usr/bin:/sbin:/bin")
|
|
env["http_proxy"] = f"http://127.0.0.1:{staticargs.port}/"
|
|
if staticargs.port is not None:
|
|
env["DEBIAN_BISECT_MIRROR"] = goodmirror
|
|
if staticargs.depends or staticargs.qemu:
|
|
scriptname = "run_bisect"
|
|
if staticargs.qemu:
|
|
scriptname = "run_bisect_qemu"
|
|
# first try run_bisect.sh from the directory where debbisect lives in
|
|
# case we run this from a git clone
|
|
run_bisect = os.path.join(
|
|
os.path.dirname(os.path.realpath(__file__)), scriptname + ".sh"
|
|
)
|
|
if not os.path.exists(run_bisect):
|
|
run_bisect = os.path.join("/usr/share/devscripts/scripts/", scriptname)
|
|
cmd = [run_bisect]
|
|
if staticargs.depends is not None:
|
|
cmd.append(staticargs.depends)
|
|
else:
|
|
cmd.append("")
|
|
cmd.extend(
|
|
[
|
|
staticargs.script,
|
|
goodmirror,
|
|
staticargs.architecture,
|
|
staticargs.suite,
|
|
staticargs.components,
|
|
]
|
|
)
|
|
if staticargs.qemu:
|
|
cmd.extend([staticargs.qemu["memsize"], staticargs.qemu["disksize"]])
|
|
if toupgrade:
|
|
timestamp_str = badtimestamp.strftime("%Y%m%dT%H%M%SZ")
|
|
cmd.extend(
|
|
[
|
|
f"http://snapshot.debian.org/archive/debian/{timestamp_str}",
|
|
toupgrade,
|
|
]
|
|
)
|
|
else:
|
|
# execute it directly if it's an executable file or if it there are no
|
|
# shell metacharacters
|
|
if (
|
|
os.access(staticargs.script, os.X_OK)
|
|
or re.search(r"[^\w@\%+=:,.\/-]", staticargs.script, re.ASCII) is None
|
|
):
|
|
cmd = [staticargs.script]
|
|
else:
|
|
cmd = ["sh", "-c", staticargs.script]
|
|
return runtest_cmd(cmd, env)
|
|
|
|
|
|
def get_log_fname(timestamp, goodbad, toupgrade=None):
|
|
if toupgrade is None:
|
|
return f"debbisect.{timestamp.strftime('%Y%m%dT%H%M%SZ')}.log.{goodbad}"
|
|
return f"debbisect.{timestamp.strftime('%Y%m%dT%H%M%SZ')}.{toupgrade}.log.{goodbad}"
|
|
|
|
|
|
def write_log_symlink(goodbad, output, timestamp, toupgrade=None):
|
|
fname = get_log_fname(timestamp, goodbad, toupgrade)
|
|
with open(fname, "wb") as f:
|
|
f.write(output)
|
|
if goodbad == "good":
|
|
if os.path.lexists("debbisect.log.good"):
|
|
os.unlink("debbisect.log.good")
|
|
os.symlink(fname, "debbisect.log.good")
|
|
elif goodbad == "bad":
|
|
if os.path.lexists("debbisect.log.bad"):
|
|
os.unlink("debbisect.log.bad")
|
|
os.symlink(fname, "debbisect.log.bad")
|
|
|
|
|
|
def bisect(good, bad, staticargs):
|
|
# no idea how to split this function into parts without making it
|
|
# unreadable
|
|
# pylint: disable=too-many-statements
|
|
diff = bad - good
|
|
print(f"snapshot timestamp difference: {diff / timedelta(days=1)} days")
|
|
|
|
stepnum = 1
|
|
starttime = datetime.now(timezone.utc)
|
|
|
|
steps = round(
|
|
(math.log(diff.total_seconds()) - math.log(DINSTALLRATE)) / math.log(2) + 2
|
|
)
|
|
print(f"approximately {steps} steps left to test")
|
|
# verify that the good timestamp is really good and the bad timestamp is really bad
|
|
# we try the bad timestamp first to make sure that the problem exists
|
|
if not staticargs.ignore_cached_results and os.path.exists(
|
|
get_log_fname(good, "good")
|
|
):
|
|
print(f"#{stepnum}: using cached results from {get_log_fname(good, 'good')}")
|
|
else:
|
|
print(f"#{stepnum}: trying known good {format_timestamp(good)}...")
|
|
ret, output = runtest(good, staticargs)
|
|
if ret != 0:
|
|
write_log_symlink("bad", output, good)
|
|
print(
|
|
"good timestamp was actually bad -- see debbisect.log.bad for details"
|
|
)
|
|
return None
|
|
write_log_symlink("good", output, good)
|
|
if not staticargs.nocache and next(
|
|
pathlib.Path(staticargs.cache).glob("**/*.part"), False
|
|
):
|
|
print("partial files left in cache after test")
|
|
return None
|
|
stepnum += 1
|
|
steps = round(
|
|
(math.log(diff.total_seconds()) - math.log(DINSTALLRATE)) / math.log(2) + 1
|
|
)
|
|
timeleft = steps * (datetime.now(timezone.utc) - starttime) / (stepnum - 1)
|
|
print(f"computation time left: {timeleft}")
|
|
print(f"approximately {steps} steps left to test")
|
|
if not staticargs.ignore_cached_results and os.path.exists(
|
|
get_log_fname(bad, "bad")
|
|
):
|
|
print(f"#{stepnum}: using cached results from {get_log_fname(bad, 'bad')}")
|
|
else:
|
|
print(f"#{stepnum}: trying known bad {format_timestamp(bad)}...")
|
|
ret, output = runtest(bad, staticargs)
|
|
if ret == 0:
|
|
write_log_symlink("good", output, bad)
|
|
print(
|
|
"bad timestamp was actually good -- see debbisect.log.good for details"
|
|
)
|
|
return None
|
|
write_log_symlink("bad", output, bad)
|
|
if not staticargs.nocache and next(
|
|
pathlib.Path(staticargs.cache).glob("**/*.part"), False
|
|
):
|
|
print("partial files left in cache after test")
|
|
return None
|
|
stepnum += 1
|
|
|
|
while True:
|
|
diff = bad - good
|
|
# One may be tempted to try and optimize this step by finding all the
|
|
# packages that differ between the two timestamps and then finding
|
|
# all the snapshot timestamps where the involved packages changed
|
|
# in their version. But since dependencies can arbitrarily change
|
|
# between two given timestamps, drawing in more packages or requiring
|
|
# less packages, the only reliable method is really to strictly bisect
|
|
# by taking the timestamp exactly between the two and not involve
|
|
# other guessing magic.
|
|
newts = sanitize_timestamp(good + diff / 2)
|
|
if newts in [good, bad]:
|
|
# If the middle timestamp mapped onto good or bad, then the
|
|
# timestamps are very close to each other. Test if there is maybe
|
|
# not another one between them by sanitizing the timestamp one
|
|
# second before the bad one
|
|
newts = sanitize_timestamp(bad - timedelta(seconds=1))
|
|
if newts == good:
|
|
break
|
|
print(f"snapshot timestamp difference: {diff / timedelta(days=1)} days")
|
|
steps = round(
|
|
(math.log(diff.total_seconds()) - math.log(DINSTALLRATE)) / math.log(2) + 0
|
|
)
|
|
timeleft = steps * (datetime.now(timezone.utc) - starttime) / (stepnum - 1)
|
|
print(f"computation time left: {timeleft}")
|
|
print(f"approximately {steps} steps left to test")
|
|
if not staticargs.ignore_cached_results and os.path.exists(
|
|
get_log_fname(newts, "good")
|
|
):
|
|
print(
|
|
f"#{stepnum}: using cached result (was good)"
|
|
f" from {get_log_fname(newts, 'good')}"
|
|
)
|
|
good = newts
|
|
elif not staticargs.ignore_cached_results and os.path.exists(
|
|
get_log_fname(newts, "bad")
|
|
):
|
|
print(
|
|
f"#{stepnum}: using cached result (was bad)"
|
|
f" from {get_log_fname(newts, 'bad')}"
|
|
)
|
|
bad = newts
|
|
else:
|
|
print(f"#{stepnum}: trying {format_timestamp(newts)}...")
|
|
ret, output = runtest(newts, staticargs)
|
|
if ret == 0:
|
|
print("test script output: good")
|
|
write_log_symlink("good", output, newts)
|
|
good = newts
|
|
else:
|
|
print("test script output: bad")
|
|
write_log_symlink("bad", output, newts)
|
|
bad = newts
|
|
if not staticargs.nocache and next(
|
|
pathlib.Path(staticargs.cache).glob("**/*.part"), False
|
|
):
|
|
print("partial files left in cache after test")
|
|
return None
|
|
stepnum += 1
|
|
return good, bad
|
|
|
|
|
|
def datetimestr(val):
|
|
if val == "now":
|
|
return datetime.now(timezone.utc)
|
|
if val == "today":
|
|
return datetime.combine(date.today(), time(0, 0, 0, 0), timezone.utc)
|
|
|
|
# since py3 we don't need pytz to figure out the local timezone
|
|
localtz = datetime.now(timezone.utc).astimezone().tzinfo
|
|
|
|
# first trying known formats
|
|
for fmt in [
|
|
"%Y%m%dT%H%M%SZ", # snapshot.debian.org style
|
|
# ISO 8601
|
|
"%Y-%m-%d",
|
|
"%Y-%m-%dT%H:%M",
|
|
"%Y-%m-%dT%H:%M:%S",
|
|
"%Y-%m-%dT%H:%M:%S%z",
|
|
]:
|
|
try:
|
|
dt = datetime.strptime(val, fmt)
|
|
except ValueError:
|
|
continue
|
|
else:
|
|
# if we don't know the timezone, assume the local one
|
|
if dt.tzinfo is None:
|
|
dt = dt.replace(tzinfo=localtz)
|
|
return dt
|
|
|
|
# try parsing the debian/changelog datetime format as specified by RFC 2822
|
|
# we cannot use strptime() because that honors the locale and RFC
|
|
# 2822 requires that day and month names be the English abbreviations.
|
|
try:
|
|
dt = email.utils.parsedate_to_datetime(val)
|
|
except TypeError:
|
|
pass
|
|
else:
|
|
return dt
|
|
|
|
# next, try parsing using dateutil.parser
|
|
if HAVE_DATEUTIL:
|
|
try:
|
|
dt = dateutil.parser.parse(val)
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
# if we don't know the timezone, assume the local one
|
|
if dt.tzinfo is None:
|
|
dt = dt.replace(tzinfo=localtz)
|
|
return dt
|
|
|
|
# if that didn't work, try freeform formats
|
|
if HAVE_PARSEDATETIME:
|
|
cal = parsedatetime.Calendar()
|
|
dt, ret = cal.parseDT(val)
|
|
if ret != 0:
|
|
# if we don't know the timezone, assume the local one
|
|
if dt.tzinfo is None:
|
|
dt = dt.replace(tzinfo=localtz)
|
|
return dt
|
|
|
|
if not HAVE_DATEUTIL:
|
|
logging.info("parsing date failed -- maybe install python3-dateutil")
|
|
if not HAVE_PARSEDATETIME:
|
|
logging.info("parsing date failed -- maybe install python3-parsedatetime")
|
|
|
|
# all failed, we cannot parse this
|
|
raise argparse.ArgumentTypeError(f"not a valid datetime: {val}")
|
|
|
|
|
|
def qemuarg(val):
|
|
defaultmem = "1G"
|
|
defaultdisk = "4G"
|
|
ret = {"memsize": defaultmem, "disksize": defaultdisk}
|
|
for keyval in val.split(","):
|
|
# we use startswith() so that "defaults" can also be abbreviated (even
|
|
# down to the empty string)
|
|
if "defaults".startswith(keyval):
|
|
ret["memsize"] = defaultmem
|
|
ret["disksize"] = defaultdisk
|
|
continue
|
|
try:
|
|
key, val = keyval.split("=", maxsplit=1)
|
|
except ValueError as e:
|
|
raise argparse.ArgumentTypeError(f"no key=val pair: {keyval}") from e
|
|
if key not in ["memsize", "disksize"]:
|
|
raise argparse.ArgumentTypeError(f"unknown key: {key}")
|
|
if not re.fullmatch(r"\d+((k|K|M|G|T|P|E|Z|Y)(iB|B)?)?", val):
|
|
raise argparse.ArgumentTypeError(f"cannot parse size value: {val}")
|
|
ret[key] = val
|
|
return ret
|
|
|
|
|
|
def scriptarg(val):
|
|
if os.path.exists(val) and not os.access(val, os.X_OK):
|
|
logging.warning("script %s is a file but not executable", val)
|
|
return val
|
|
|
|
|
|
def read_pkglist(infile):
|
|
result = {}
|
|
with open(infile, encoding="utf8") as f:
|
|
for line in f:
|
|
pkg, version = line.split("\t")
|
|
result[pkg] = version.strip()
|
|
return result
|
|
|
|
|
|
def upgrade_single_package(toupgrade, goodpkgs, badpkgs, good, bad, staticargs):
|
|
if toupgrade in goodpkgs:
|
|
print(
|
|
f"test upgrading {toupgrade} {goodpkgs[toupgrade]}"
|
|
f" -> {badpkgs[toupgrade]}..."
|
|
)
|
|
else:
|
|
print(f"test installing {toupgrade} {badpkgs[toupgrade]}...")
|
|
newbadpkgpath = f"./debbisect.{good.strftime('%Y%m%dT%H%M%SZ')}.{toupgrade}.pkglist"
|
|
if (
|
|
not staticargs.ignore_cached_results
|
|
and os.path.exists(newbadpkgpath)
|
|
and os.path.exists(get_log_fname(good, "good", toupgrade))
|
|
):
|
|
print(
|
|
f"using cached result (was good)"
|
|
f" from {get_log_fname(good, 'good', toupgrade)}"
|
|
)
|
|
if toupgrade in goodpkgs:
|
|
print(f" upgrading {toupgrade} does not cause the problem")
|
|
else:
|
|
print(f" installing {toupgrade} does not cause the problem")
|
|
return
|
|
if (
|
|
not staticargs.ignore_cached_results
|
|
and os.path.exists(newbadpkgpath)
|
|
and os.path.exists(get_log_fname(good, "bad", toupgrade))
|
|
):
|
|
print(
|
|
f"using cached result (was bad)"
|
|
f" from {get_log_fname(good, 'bad', toupgrade)}"
|
|
)
|
|
print(f" upgrading {toupgrade} triggered the problem")
|
|
else:
|
|
ret, output = runtest(good, staticargs, toupgrade, bad)
|
|
if ret == 0:
|
|
write_log_symlink("good", output, good, toupgrade)
|
|
if toupgrade in goodpkgs:
|
|
print(f" upgrading {toupgrade} does not cause the problem")
|
|
else:
|
|
print(f" installing {toupgrade} does not cause the problem")
|
|
return
|
|
write_log_symlink("bad", output, good, toupgrade)
|
|
print(f" upgrading {toupgrade} triggered the problem")
|
|
# this package introduced the regression check if more than
|
|
# just the package in question got upgraded
|
|
newbadpkgs = read_pkglist(newbadpkgpath)
|
|
# find all packages that are completely new or of a
|
|
# different version than those in the last good test
|
|
newupgraded = []
|
|
for pkg, version in newbadpkgs.items():
|
|
if pkg not in goodpkgs or version != goodpkgs[pkg]:
|
|
newupgraded.append(pkg)
|
|
if not newupgraded:
|
|
logging.error(
|
|
"no difference in installed packages -- something else must've changed"
|
|
)
|
|
return
|
|
|
|
if len(newupgraded) == 1:
|
|
# the only upgraded package should be the one that was
|
|
# requested to be upgraded
|
|
assert newupgraded[0] == toupgrade
|
|
|
|
print(" additional packages that got upgraded/installed at the same time:")
|
|
for newtoupgrade in newupgraded:
|
|
if newtoupgrade == toupgrade:
|
|
continue
|
|
print(
|
|
f" {newtoupgrade} {goodpkgs.get(newtoupgrade, '(n.a.)')}"
|
|
f" -> {newbadpkgs[newtoupgrade]}"
|
|
)
|
|
|
|
|
|
def parseargs():
|
|
progname = sys.argv[0]
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
description=f"""\
|
|
|
|
Execute a script or a shell snippet for a known good timestamp and a known bad
|
|
timestamp and then bisect the timestamps until a timestamp from
|
|
snapshot.debian.org is found where the script first fails. Environment
|
|
variables are used to tell the script which timestamp to test. See ENVIRONMENT
|
|
VARIABLES below. At the end of the execution, the files debbisect.log.good and
|
|
debbisect.log.bad are the log files of the last good and last bad run,
|
|
respectively. By default, a temporary caching proxy is executed to reduce
|
|
bandwidth usage on snapshot.debian.org. If you plan to run debbisect multiple
|
|
times on a similar range of timestamps, consider setting a non-temporary cache
|
|
directory with the --cache option.
|
|
|
|
The program has three basic modes of operation. In the first, the given script
|
|
is responsible to set up everything as needed:
|
|
|
|
$ {progname} "last week" today script.sh
|
|
$ diff -u debbisect.log.good debbisect.log.bad
|
|
|
|
If also the --depends option is given, then a chroot of the correct timestamp
|
|
will be created each time and the script will receive as first argument the
|
|
path to that chroot. Additionally, this mode allows debbisect to figure out the
|
|
exact package that was responsible for the failure instead of only presenting
|
|
you the last good and first bad timestamp.
|
|
|
|
Lastly, you can also provide the --qemu option. In this mode, your test will be
|
|
create a qemu virtual machine of the correct timestamp each time. The script
|
|
will receive the correct ssh config to log into a host named qemu and execute
|
|
arbitrary commands.
|
|
|
|
""",
|
|
epilog=f"""\
|
|
|
|
*EXAMPLES*
|
|
|
|
Just run "do_something" which runs the test and returns a non-zero exit on
|
|
failure.
|
|
|
|
$ {progname} "last week" today "mmdebstrap --customize-hook\
|
|
='chroot \\"\\$1\\" do_something' unstable - \\$DEBIAN_BISECT_MIRROR >/dev/null"
|
|
$ diff -u debbisect.log.good debbisect.log.bad
|
|
|
|
Since the command can easily become very long and quoting very involved, lets
|
|
instead use a script:
|
|
|
|
$ cat << END > script.sh
|
|
> #!/bin/sh
|
|
> set -exu
|
|
> mmdebstrap \\
|
|
> --verbose \\
|
|
> --aptopt='Acquire::Check-Valid-Until "false"' \\
|
|
> --variant=apt \\
|
|
> --include=pkga,pkgb,pkgc \\
|
|
> --customize-hook='chroot "$1" dpkg -l' \\
|
|
> --customize-hook='chroot "$1" do_something' \\
|
|
> unstable \\
|
|
> - \\
|
|
> $DEBIAN_BISECT_MIRROR \\
|
|
> >/dev/null
|
|
> END
|
|
$ chmod +x script.sh
|
|
$ {progname} --verbose --cache=./cache "two years ago" yesterday ./script.sh
|
|
$ diff -u debbisect.log.good debbisect.log.bad
|
|
$ rm -r ./cache
|
|
|
|
This example sets Acquire::Check-Valid-Until to not fail on snapshot timestamps
|
|
from "two years ago", uses the "apt" variant (only Essential:yes plus apt),
|
|
installs the packages required for the test using --include, runs "dpkg -l" so
|
|
that we can see which packages differed in the logs at the end and uses
|
|
--cache=cache so that the apt cache does not get discarded at the end and the
|
|
command can be re-run without downloading everything from snapshot.debian.org
|
|
again.
|
|
|
|
If you want to build a source package you can use the script shipped by
|
|
devscripts as /usr/share/doc/devscripts/examples/debbisect_buildsrc.sh and
|
|
either use it unmodified like this:
|
|
|
|
$ DEBIAN_BISECT_SRCPKG=mysrc {progname} "two years ago" yesterday \
|
|
/usr/share/doc/devscripts/examples/debbisect_buildsrc.sh
|
|
|
|
or use the script as a starting point to do your own custom builds.
|
|
|
|
Once debbisect has finished bisecting and figured out the last good and the
|
|
first bad timestamp, there might be more than one package that differs in
|
|
version between these two timestamps. debbisect can figure out which package is
|
|
the culprit if you hand it control over installing dependencies for you via the
|
|
--depends option. With that option active, the script will not be responsible
|
|
to set up a chroot itself but is given the path to an existing chroot as the
|
|
first argument. Here is a real example that verifies the package responsible
|
|
for Debian bug #912935:
|
|
|
|
$ {progname} --depends=botch "2018-11-17" "2018-11-22" \
|
|
'chroot "$1" botch-dose2html --packages=/dev/null --help'
|
|
[...]
|
|
test upgrading python3-minimal 3.6.7-1 -> 3.7.1-2...
|
|
upgrading python3-minimal triggered the problem
|
|
|
|
If you want to run above test under qemu, then you would run:
|
|
|
|
$ {progname} --depends=botch --qemu=defaults "2018-11-17" "2018-11-22" \
|
|
'ssh -F "$1" qemu botch-dose2html --packages=/dev/null --help'
|
|
|
|
In the last two examples we omitted the --cache argument for brevity. But
|
|
please make use of it to reduce the load on snapshot.debian.org.
|
|
|
|
*TIMESTAMPS*
|
|
|
|
Valid good and bad timestamp formats are either:
|
|
|
|
> the format used by snapshot.debian.org
|
|
> ISO 8601 (with or without time, seconds and timezone)
|
|
> RFC 2822 (used in debian/changelog)
|
|
> all formats understood by the Python dateutil module (if installed)
|
|
> all formats understood by the Python parsedatetime module (if installed)
|
|
|
|
Without specifying the timezone explicitly, the local offset is used.
|
|
|
|
Examples (corresponding to the items in above list, respectively):
|
|
|
|
> 20200313T065326Z
|
|
> 2020-03-13T06:53:26+00:00
|
|
> Fri, 29 Nov 2019 14:00:08 +0100
|
|
> 5:50 A.M. on June 13, 1990
|
|
> two weeks ago
|
|
|
|
The earliest timestamp that works with debbisect should be 2006-08-10.
|
|
|
|
*ENVIRONMENT VARIABLES*
|
|
|
|
The following environment variables are available to the test script:
|
|
|
|
DEBIAN_BISECT_MIRROR Contains the snapshot.d.o mirror address including the
|
|
current timestamp. Can also be generated from
|
|
DEBIAN_BISECT_TIMESTAMP as
|
|
http://snapshot.debian.org/archive/debian/$DEBIAN_BISECT_TIMESTAMP
|
|
|
|
DEBIAN_BISECT_EPOCH Contains an integer representing the unix epoch of the
|
|
current timestamp. The value of this variable can
|
|
directly be assigned to SOURCE_DATE_EPOCH.
|
|
|
|
DEBIAN_BISECT_TIMESTAMP Contains a timestamp in the format used by
|
|
snapshot.debian.org. Can also be generated from
|
|
DEBIAN_BISECT_EPOCH via:
|
|
date --utc --date=@$DEBIAN_BISECT_EPOCH +%Y%m%dT%H%M%SZ
|
|
|
|
DEBIAN_BISECT_* All environment variables starting with DEBIAN_BISECT_
|
|
are passed to the test script.
|
|
|
|
http_proxy The address of the apt http caching proxy. Unset the
|
|
variable in your script if you want to disable the
|
|
cache.
|
|
|
|
Written by Johannes Schauer Marin Rodrigues <josch@debian.org>
|
|
""",
|
|
)
|
|
parser.add_argument(
|
|
"-d",
|
|
"--debug",
|
|
help="Print lots of debugging statements",
|
|
action="store_const",
|
|
dest="loglevel",
|
|
const=logging.DEBUG,
|
|
default=logging.WARNING,
|
|
)
|
|
parser.add_argument(
|
|
"-v",
|
|
"--verbose",
|
|
help="Be verbose",
|
|
action="store_const",
|
|
dest="loglevel",
|
|
const=logging.INFO,
|
|
)
|
|
parser.add_argument(
|
|
"--cache", help="cache directory -- by default $TMPDIR is used", type=str
|
|
)
|
|
parser.add_argument("--nocache", help="disable cache", action="store_true")
|
|
parser.add_argument(
|
|
"--port",
|
|
help="manually choose port number for the apt cache instead of "
|
|
"automatically choosing a free port",
|
|
type=int,
|
|
default=0,
|
|
)
|
|
parser.add_argument(
|
|
"--depends",
|
|
help="Comma separated list of binary packages the test script "
|
|
"requires. With this option, the test script will run inside a "
|
|
"chroot with the requested packages installed.",
|
|
type=str,
|
|
)
|
|
parser.add_argument(
|
|
"--qemu",
|
|
help="Create qemu virtual machine and pass a ssh config file to the "
|
|
"test script. This argument takes a comma-separated series of "
|
|
"key=value pairs to specify the virtual machine memory size (via "
|
|
"memsize) and the virtual machine disksize (via disksize). Sizes "
|
|
"are measured in bytes or with common unit suffixes like M or G. "
|
|
"To pick the default values (disksize=4G,memsize=1G) the shorthand "
|
|
"'defaults' can be passed.",
|
|
type=qemuarg,
|
|
)
|
|
parser.add_argument(
|
|
"--architecture",
|
|
help="Chosen architecture when creating the chroot with --depends or "
|
|
"--qemu (default: native architecture)",
|
|
default=subprocess.check_output(["dpkg", "--print-architecture"]).rstrip(),
|
|
type=str,
|
|
)
|
|
parser.add_argument(
|
|
"--suite",
|
|
help="Chosen suite when creating the chroot with --depends or --qemu "
|
|
"(default: unstable)",
|
|
default="unstable",
|
|
type=str,
|
|
)
|
|
parser.add_argument(
|
|
"--components",
|
|
help="Chosen components (separated by comma or whitespace) when "
|
|
"creating the chroot with --depends or --qemu (default: main)",
|
|
default="main",
|
|
type=str,
|
|
)
|
|
parser.add_argument(
|
|
"--no-find-exact-package",
|
|
help="Normally, when the --depends argument is given so that "
|
|
"debbisect takes care of managing dependencies, the precise package "
|
|
"that introduced the problem is determined after bisection by "
|
|
"installing the packages that differ between the last good and "
|
|
"first bad timestamp one by one. This option disables this feature.",
|
|
action="store_true",
|
|
)
|
|
parser.add_argument(
|
|
"--ignore-cached-results",
|
|
help="Perform a run for a timestamp even if a log file for it exists "
|
|
"in the current directory",
|
|
action="store_true",
|
|
)
|
|
parser.add_argument(
|
|
"good",
|
|
type=datetimestr,
|
|
help="good timestamp -- see section TIMESTAMPS for valid formats",
|
|
)
|
|
parser.add_argument(
|
|
"bad",
|
|
type=datetimestr,
|
|
help="bad timestamp -- see section TIMESTAMPS for valid formats",
|
|
)
|
|
parser.add_argument(
|
|
"script",
|
|
type=scriptarg,
|
|
help="test script -- can either be a shell code snippet or an "
|
|
"executable script. A non zero exit code indicates failure. "
|
|
"When also --depends is used, then the first argument to the "
|
|
"script will be the chroot directory. When --qemu is used, then "
|
|
"the first argument to the script will an ssh config for a host "
|
|
"named qemu.",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def find_exact_package(good, bad, staticargs, depends, no_find_exact_package):
|
|
goodpkglist = f"./debbisect.{good.strftime('%Y%m%dT%H%M%SZ')}.pkglist"
|
|
if not os.path.exists(goodpkglist):
|
|
logging.error("%s doesn't exist", goodpkglist)
|
|
sys.exit(1)
|
|
badpkglist = f"./debbisect.{bad.strftime('%Y%m%dT%H%M%SZ')}.pkglist"
|
|
if not os.path.exists(badpkglist):
|
|
logging.error("%s doesn't exist", badpkglist)
|
|
sys.exit(1)
|
|
|
|
# Create a set of packages for which either the version in the last good
|
|
# and first bad run differs or which only exist in the first bad run.
|
|
goodpkgs = read_pkglist(goodpkglist)
|
|
badpkgs = read_pkglist(badpkglist)
|
|
upgraded = set()
|
|
for pkg, version in goodpkgs.items():
|
|
if pkg in badpkgs and version != badpkgs[pkg]:
|
|
upgraded.add(pkg)
|
|
for pkg, version in badpkgs.items():
|
|
if pkg not in goodpkgs or version != goodpkgs[pkg]:
|
|
upgraded.add(pkg)
|
|
upgraded = list(sorted(upgraded))
|
|
if not upgraded:
|
|
logging.error("no difference -- this should never happen")
|
|
sys.exit(1)
|
|
elif len(upgraded) == 1:
|
|
print(
|
|
f"only one package differs: {upgraded[0]}"
|
|
f" {goodpkgs.get(upgraded[0], '(n.a.)')} -> {badpkgs[upgraded[0]]}"
|
|
)
|
|
else:
|
|
print(
|
|
"the following packages differ between the last good and "
|
|
"first bad timestamp:"
|
|
)
|
|
for toupgrade in upgraded:
|
|
print(
|
|
f" {toupgrade} {goodpkgs.get(toupgrade, '(n.a.)')}"
|
|
f" -> {badpkgs[toupgrade]}"
|
|
)
|
|
|
|
# if debbisect was tasked with handling dependencies itself, try to
|
|
# figure out the exact package that introduce the break
|
|
if depends and not no_find_exact_package:
|
|
for toupgrade in upgraded:
|
|
upgrade_single_package(
|
|
toupgrade, goodpkgs, badpkgs, good, bad, staticargs
|
|
)
|
|
|
|
|
|
def ensure_mmdebstrap_version(reqver: str) -> bool:
|
|
try:
|
|
version = subprocess.check_output(
|
|
["mmdebstrap", "--version"], stderr=subprocess.DEVNULL
|
|
)
|
|
except subprocess.CalledProcessError:
|
|
print("running mmdebstrap --version resulted in non-zero exit")
|
|
sys.exit(1)
|
|
except FileNotFoundError:
|
|
print("you need to install mmdebstrap")
|
|
sys.exit(1)
|
|
|
|
version = version.decode().removeprefix("mmdebstrap ")
|
|
|
|
return debian.debian_support.Version(version) >= debian.debian_support.Version(
|
|
reqver
|
|
)
|
|
|
|
|
|
def main():
|
|
args = parseargs()
|
|
|
|
logging.basicConfig(level=args.loglevel)
|
|
|
|
good = sanitize_timestamp(args.good)
|
|
if good != args.good:
|
|
print(
|
|
f"good timestamp {format_timestamp(args.good)} was remapped to"
|
|
f" snapshot.d.o timestamp {format_timestamp(good)}"
|
|
)
|
|
bad = sanitize_timestamp(args.bad)
|
|
if bad != args.bad:
|
|
print(
|
|
f"bad timestamp {format_timestamp(args.bad)} was remapped to"
|
|
f" snapshot.d.o timestamp {format_timestamp(bad)}"
|
|
)
|
|
|
|
if good > bad:
|
|
print("good is later than bad")
|
|
sys.exit(1)
|
|
|
|
# check if mmdebstrap is installed and at least 1.3.0
|
|
if (args.depends or args.qemu) and not ensure_mmdebstrap_version("1.3.0"):
|
|
print("you need at least mmdebstrap version 1.3.0")
|
|
sys.exit(1)
|
|
|
|
port = None
|
|
if not args.nocache:
|
|
port, teardown = setupcache(args.cache, args.port)
|
|
atexit.register(teardown)
|
|
|
|
staticargs = collections.namedtuple(
|
|
"args",
|
|
[
|
|
"script",
|
|
"port",
|
|
"depends",
|
|
"architecture",
|
|
"suite",
|
|
"components",
|
|
"qemu",
|
|
"ignore_cached_results",
|
|
"cache",
|
|
"nocache",
|
|
],
|
|
)
|
|
for a in staticargs._fields:
|
|
setattr(staticargs, a, getattr(args, a))
|
|
staticargs.port = port
|
|
if good == bad:
|
|
# test only single timestamp
|
|
print(f"trying single timestamp {format_timestamp(good)}...")
|
|
if not staticargs.ignore_cached_results and os.path.exists(
|
|
get_log_fname(good, "good")
|
|
):
|
|
print(f"using cached result (was good) from {get_log_fname(good, 'good')}")
|
|
ret = 0
|
|
elif not staticargs.ignore_cached_results and os.path.exists(
|
|
get_log_fname(good, "bad")
|
|
):
|
|
print(f"using cached result (was bad) from {get_log_fname(good, 'bad')}")
|
|
ret = 1
|
|
else:
|
|
ret, output = runtest(good, staticargs)
|
|
if ret == 0:
|
|
print("test script output: good")
|
|
write_log_symlink("good", output, good)
|
|
else:
|
|
print("test script output: bad")
|
|
write_log_symlink("bad", output, good)
|
|
sys.exit(ret)
|
|
res = bisect(good, bad, staticargs)
|
|
if res is not None:
|
|
good, bad = res
|
|
print("bisection finished successfully")
|
|
print(f" last good timestamp: {format_timestamp(good)}")
|
|
print(f" first bad timestamp: {format_timestamp(bad)}")
|
|
|
|
find_exact_package(
|
|
good, bad, staticargs, args.depends, args.no_find_exact_package
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|