diff options
Diffstat (limited to 'scrub')
-rw-r--r-- | scrub/Makefile.in | 188 | ||||
-rw-r--r-- | scrub/e2scrub.8.in | 69 | ||||
-rw-r--r-- | scrub/e2scrub.conf.in | 25 | ||||
-rw-r--r-- | scrub/e2scrub.in | 283 | ||||
-rw-r--r-- | scrub/e2scrub.rules.in | 2 | ||||
-rw-r--r-- | scrub/e2scrub@.service.in | 20 | ||||
-rw-r--r-- | scrub/e2scrub_all.8.in | 47 | ||||
-rw-r--r-- | scrub/e2scrub_all.cron.in | 2 | ||||
-rw-r--r-- | scrub/e2scrub_all.in | 185 | ||||
-rw-r--r-- | scrub/e2scrub_all.service.in | 12 | ||||
-rw-r--r-- | scrub/e2scrub_all.timer.in | 11 | ||||
-rw-r--r-- | scrub/e2scrub_all_cron.in | 69 | ||||
-rw-r--r-- | scrub/e2scrub_fail.in | 38 | ||||
-rw-r--r-- | scrub/e2scrub_fail@.service.in | 10 | ||||
-rw-r--r-- | scrub/e2scrub_reap.service.in | 25 |
15 files changed, 986 insertions, 0 deletions
diff --git a/scrub/Makefile.in b/scrub/Makefile.in new file mode 100644 index 0000000..387f650 --- /dev/null +++ b/scrub/Makefile.in @@ -0,0 +1,188 @@ +# +# Makefile for e2scrub +# + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +top_builddir = .. +my_dir = scrub +INSTALL = @INSTALL@ +MKDIR_P = @MKDIR_P@ + +@MCONFIG@ + +PROGS= e2scrub e2scrub_all +MANPAGES= e2scrub.8 e2scrub_all.8 +CONFFILES= e2scrub.conf + +ifeq ($(HAVE_UDEV),yes) +UDEV_RULES = e2scrub.rules +INSTALLDIRS_TGT += installdirs-udev +INSTALL_TGT += install-udev +UNINSTALL_TGT += uninstall-udev +endif + +ifeq ($(HAVE_CROND),yes) +CRONTABS = e2scrub_all.cron +LIBPROGS += e2scrub_all_cron +INSTALLDIRS_TGT += installdirs-crond installdirs-libprogs +INSTALL_TGT += install-crond install-libprogs +UNINSTALL_TGT += uninstall-crond uninstall-libprogs +endif + +ifeq ($(HAVE_SYSTEMD),yes) +SERVICE_FILES = e2scrub@.service e2scrub_all.service e2scrub_all.timer e2scrub_fail@.service e2scrub_reap.service +LIBPROGS += e2scrub_fail +INSTALLDIRS_TGT += installdirs-systemd installdirs-libprogs +INSTALL_TGT += install-systemd install-libprogs +UNINSTALL_TGT += uninstall-systemd uninstall-libprogs +endif + +all:: $(PROGS) $(MANPAGES) $(CONFFILES) $(UDEV_RULES) $(SERVICE_FILES) $(CRONTABS) $(LIBPROGS) + +e2scrub: $(DEP_SUBSTITUTE) e2scrub.in + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $(srcdir)/e2scrub.in $@ + $(Q) chmod a+x $@ + +e2scrub_all: e2scrub_all.in + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $(srcdir)/e2scrub_all.in $@ + $(Q) chmod a+x $@ + +e2scrub_fail: e2scrub_fail.in + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $(srcdir)/e2scrub_fail.in $@ + $(Q) chmod a+x $@ + +e2scrub_all_cron: e2scrub_all_cron.in + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $(srcdir)/e2scrub_all_cron.in $@ + $(Q) chmod a+x $@ + +%.8: %.8.in $(DEP_SUBSTITUTE) + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $< $@ + +%.conf: %.conf.in $(DEP_SUBSTITUTE) + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $< $@ + +%.rules: %.rules.in $(DEP_SUBSTITUTE) + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $< $@ + +%.service: %.service.in $(DEP_SUBSTITUTE) + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $< $@ + +%.cron: %.cron.in $(DEP_SUBSTITUTE) + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $< $@ + +%.timer: %.timer.in $(DEP_SUBSTITUTE) + $(E) " SUBST $@" + $(Q) $(SUBSTITUTE_UPTIME) $< $@ + +installdirs-udev: + $(E) " MKDIR_P $(UDEV_RULES_DIR)" + $(Q) $(MKDIR_P) $(DESTDIR)$(UDEV_RULES_DIR) + +installdirs-crond: + $(E) " MKDIR_P $(CROND_DIR)" + $(Q) $(MKDIR_P) $(DESTDIR)$(CROND_DIR) + +installdirs-libprogs: + $(E) " MKDIR_P $(pkglibdir)" + $(Q) $(MKDIR_P) $(DESTDIR)$(pkglibdir) + +installdirs-systemd: + $(E) " MKDIR_P $(SYSTEMD_SYSTEM_UNIT_DIR)" + $(Q) $(MKDIR_P) $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR) + +installdirs: $(INSTALLDIRS_TGT) + $(E) " MKDIR_P $(root_sbindir) $(man8dir) $(root_sysconfdir)" + $(Q) $(MKDIR_P) $(DESTDIR)$(root_sbindir) \ + $(DESTDIR)$(man8dir) $(DESTDIR)$(root_sysconfdir) + +install-udev: installdirs-udev + $(Q) for i in $(UDEV_RULES); do \ + $(ES) " INSTALL $(UDEV_RULES_DIR)/$$i"; \ + $(INSTALL_DATA) $$i $(DESTDIR)$(UDEV_RULES_DIR)/96-$$i; \ + done + +install-crond: installdirs-crond + $(Q) if test -n "$(CRONTABS)" ; then \ + $(ES) " INSTALL $(CROND_DIR)/e2scrub_all" ; \ + $(INSTALL_DATA) e2scrub_all.cron $(DESTDIR)$(CROND_DIR)/e2scrub_all ; \ + fi + +install-libprogs: $(LIBPROGS) installdirs-libprogs + $(Q) for i in $(LIBPROGS); do \ + $(ES) " INSTALL $(pkglibdir)/$$i"; \ + $(INSTALL_PROGRAM) $$i $(DESTDIR)$(pkglibdir)/$$i; \ + done + +install-systemd: $(SERVICE_FILES) installdirs-systemd + $(Q) for i in $(SERVICE_FILES); do \ + $(ES) " INSTALL_DATA $(SYSTEMD_SYSTEM_UNIT_DIR)/$$i"; \ + $(INSTALL_DATA) $$i $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR)/$$i; \ + done + +install-strip: install + +install: $(PROGS) $(MANPAGES) $(FMANPAGES) installdirs $(INSTALL_TGT) + $(Q) for i in $(PROGS); do \ + $(ES) " INSTALL $(root_sbindir)/$$i"; \ + $(INSTALL_PROGRAM) $$i $(DESTDIR)$(root_sbindir)/$$i; \ + done + $(Q) for i in $(MANPAGES); do \ + for j in $(COMPRESS_EXT); do \ + $(RM) -f $(DESTDIR)$(man8dir)/$$i.$$j; \ + done; \ + $(ES) " INSTALL_DATA $(man8dir)/$$i"; \ + $(INSTALL_DATA) $$i $(DESTDIR)$(man8dir)/$$i; \ + done + $(Q) for i in $(CONFFILES); do \ + $(ES) " INSTALL_DATA $(root_sysconfdir)/$$i"; \ + $(INSTALL_DATA) $$i $(DESTDIR)$(root_sysconfdir)/$$i; \ + done + +uninstall-udev: + for i in $(UDEV_RULES); do \ + $(RM) -f $(DESTDIR)$(UDEV_RULES_DIR)/96-$$i; \ + done + +uninstall-crond: + if test -n "$(CRONTABS)" ; then \ + $(RM) -f $(DESTDIR)$(CROND_DIR)/e2scrub_all ; \ + fi + +uninstall-libprogs: + for i in $(LIBPROGS); do \ + $(RM) -f $(DESTDIR)$(pkglibdir)/$$i; \ + done + +uninstall-systemd: + for i in $(SERVICE_FILES); do \ + $(RM) -f $(DESTDIR)$(SYSTEMD_SYSTEM_UNIT_DIR)/$$i; \ + done + +uninstall: $(UNINSTALL_TGT) + for i in $(PROGS); do \ + $(RM) -f $(DESTDIR)$(root_sbindir)/$$i; \ + done + for i in $(MANPAGES); do \ + $(RM) -f $(DESTDIR)$(man8dir)/$$i; \ + done + for i in $(CONFFILES); do \ + $(RM) -f $(DESTDIR)$(root_sysconfdir)/$$i; \ + done + +clean:: + $(RM) -f $(PROGS) $(MANPAGES) $(CONFFILES) $(UDEV_RULES) $(SERVICE_FILES) $(CRONTABS) $(LIBPROGS) + +mostlyclean: clean +distclean: clean + $(RM) -f .depend Makefile $(srcdir)/TAGS $(srcdir)/Makefile.in.old diff --git a/scrub/e2scrub.8.in b/scrub/e2scrub.8.in new file mode 100644 index 0000000..3d27751 --- /dev/null +++ b/scrub/e2scrub.8.in @@ -0,0 +1,69 @@ +.TH E2SCRUB 8 "@E2FSPROGS_MONTH@ @E2FSPROGS_YEAR@" "E2fsprogs version @E2FSPROGS_VERSION@" +.SH NAME +e2scrub - check the contents of a mounted ext[234] file system +.SH SYNOPSIS +.B +e2scrub [OPTION] MOUNTPOINT | DEVICE +.SH DESCRIPTION +.B e2scrub +attempts to check (but not repair) all metadata in a mounted ext[234] +file system if the file system resides on an LVM logical volume. +The block device of the LVM logical volume can also be passed in. + +This program snapshots the volume and runs a file system check on the snapshot +to look for corruption errors. +The LVM volume group must have at least 256MiB of unallocated space to +dedicate to the snapshot or the logical volume will be skipped. +The snapshot will be named +.IR lvname ".e2scrub" +and +.B udev +will not create symbolic links to it under +.IR /dev/disk . +Every attempt will be made to remove the snapshots prior to running +.BR e2scrub , +but in a dire situation it may be necessary to remove the snapshot manually. + +If no errors are found, +.B fstrim +can be called on the file system if it is mounted. +If errors are found, the file system will be marked as having errors. +The file system should be taken offline and +.B e2fsck +run as soon as possible, because +.B e2scrub +does not fix corruptions. +If the file system is not repaired, +.B e2fsck +will be run before the next mount. +.SH OPTIONS +.TP +\fB-n\fR +Print what commands +.B e2scrub +would execute to check the file system. (Note: the commands will not +actually be executed; however, since +.B e2scrub +needs to run some commands to query the system to determine what +commands would be executed, it still needs to be run as root.) +.TP +\fB-r\fR +Remove the e2scrub snapshot and exit without checking anything. +.TP +\fB-t\fR +Run +.B +fstrim(1) +on the mounted file system if no errors are found. +.TP +\fB-V\fR +Print version information and exit. +.SH EXIT CODE +The exit codes are the same as in +.BR e2fsck (8) +.SH SEE ALSO +.BR e2fsck (8) +.SH AUTHOR +Darrick J. Wong <darrick.wong@oracle.com> +.SH COPYRIGHT +Copyright \[co]2018 Oracle. License is GPLv2+. <http://www.gnu.org/licenses/gpl-2.0.html> diff --git a/scrub/e2scrub.conf.in b/scrub/e2scrub.conf.in new file mode 100644 index 0000000..661fc13 --- /dev/null +++ b/scrub/e2scrub.conf.in @@ -0,0 +1,25 @@ +# e2scrub configuration file + +# Uncomment to enable automatic periodic runs of e2scrub_all +# (either via cron or via a systemd timer) +# periodic_e2scrub=1 + +# e-mail destination used by e2scrub_fail when problems are found with +# the file system. +# recipient=root + +# e-mail sender used by e2scrub_fail when problems are found with +# the file system. +# sender=e2scrub@host.domain.name + +# Snapshots will be created to run fsck; the snapshot will be of this size. +# snap_size_mb=256 + +# Set this to 1 to enable fstrim for everyone. +# fstrim=0 + +# Arguments passed into e2fsck. +# e2fsck_opts="-vtt" + +# Set this to 1 to have e2scrub_all scrub all LVs, not just the mounted ones. +# scrub_all=0 diff --git a/scrub/e2scrub.in b/scrub/e2scrub.in new file mode 100644 index 0000000..7ed57f2 --- /dev/null +++ b/scrub/e2scrub.in @@ -0,0 +1,283 @@ +#!/bin/bash + +# Copyright (C) 2018 Oracle. All Rights Reserved. +# +# Author: Darrick J. Wong <darrick.wong@oracle.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + +# Automatically check an LVM-managed filesystem online. +# We use lvm snapshots to do this, which means that we can only +# check filesystems in VGs that have at least 256MB (or so) of +# free space. + +PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin + +if (( $EUID != 0 )); then + echo "e2scrub must be run as root" + exit 1 +fi + +snap_size_mb=256 +fstrim=0 +reap=0 +e2fsck_opts="" +conffile="@root_sysconfdir@/e2scrub.conf" + +test -f "${conffile}" && . "${conffile}" + +print_help() { + echo "Usage: $0 [OPTIONS] mountpoint | device" + echo + echo "mountpoint must be on an LVM-managed block device" + echo "-n: Show what commands e2scrub would execute." + echo "-r: Remove e2scrub snapshot and exit, do not check anything." + echo "-t: Run fstrim if successful." + echo "-V: Print version information and exit." +} + +print_version() { + echo "e2scrub @E2FSPROGS_VERSION@ (@E2FSPROGS_DATE@)" +} + +exitcode() { + ret="$1" + + # If we're being run as a service, the return code must fit the LSB + # init script action error guidelines, which is to say that we + # compress all errors to 1 ("generic or unspecified error", LSB 5.0 + # section 22.2) and hope the admin will scan the log for what + # actually happened. + + # We have to sleep 2 seconds here because journald uses the pid to + # connect our log messages to the systemd service. This is critical + # for capturing all the log messages if the scrub fails, because the + # fail service uses the service name to gather log messages for the + # error report. + if [ -n "${SERVICE_MODE}" -a "${ret}" -ne 0 ]; then + test "${ret}" -ne 0 && ret=1 + sleep 2 + fi + + exit "${ret}" +} + +while getopts "nrtV" opt; do + case "${opt}" in + "n") DBG="echo Would execute: " ;; + "r") reap=1;; + "t") fstrim=1;; + "V") print_version; exitcode 0;; + *) print_help; exitcode 2;; + esac +done +shift "$((OPTIND - 1))" + +arg="$1" +if [ -z "${arg}" ]; then + print_help + exitcode 1 +fi + +if ! type lsblk >& /dev/null ; then + echo "e2scrub: can't find lsblk --- is util-linux installed?" + exitcode 1 +fi + +if ! type lvcreate >& /dev/null ; then + echo "e2scrub: can't find lvcreate --- is lvm2 installed?" + exitcode 1 +fi + +# close file descriptor 3 (from cron) since it causes lvm to kvetch +exec 3<&- + +# Find the device for a given mountpoint +dev_from_mount() { + local mountpt="$(realpath "$1")" + + lsblk -o NAME,FSTYPE,MOUNTPOINT -p -P -n 2> /dev/null | while read vars; do + eval "${vars}" + if [ "${mountpt}" != "${MOUNTPOINT}" ]; then + continue + fi + case "${FSTYPE}" in + ext[234]) + echo "${NAME}" + return 0 + ;; + esac + done + return 1 +} + +# Check a device argument +dev_from_arg() { + local dev="$1" + local fstype="$(lsblk -o FSTYPE -n "${dev}" 2> /dev/null)" + + case "${fstype}" in + ext[234]) + echo "${dev}" + return 0 + ;; + esac + return 1 +} + +mnt_from_dev() { + local dev="$1" + + if [ -n "${dev}" ]; then + lsblk -o MOUNTPOINT -n "${dev}" + fi +} + +# Construct block device path and mountpoint from argument +if [ -b "${arg}" ]; then + dev="$(dev_from_arg "${arg}")" + mnt="$(mnt_from_dev "${dev}")" +else + dev="$(dev_from_mount "${arg}")" + mnt="${arg}" +fi +if [ ! -e "${dev}" ]; then + echo "${arg}: Not an ext[234] filesystem." + print_help + exitcode 16 +fi + +# Make sure this is an LVM device we can snapshot +lvm_vars="$(lvs --nameprefixes -o name,vgname,lv_role --noheadings "${dev}" 2> /dev/null)" +eval "${lvm_vars}" +if [ -z "${LVM2_VG_NAME}" ] || [ -z "${LVM2_LV_NAME}" ] || + echo "${LVM2_LV_ROLE}" | grep -q "snapshot"; then + echo "${arg}: Not connected to an LVM logical volume." + print_help + exitcode 16 +fi +start_time="$(date +'%Y%m%d%H%M%S')" +snap="${LVM2_LV_NAME}.e2scrub" +snap_dev="/dev/${LVM2_VG_NAME}/${snap}" + +teardown() { + # Remove and wait for removal to succeed. + ${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}" + while [ -e "${snap_dev}" ] && [ "$?" -eq "5" ]; do + sleep 0.5 + ${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}" + done +} + +check() { + # First we recover the journal, then we see if e2fsck tries any + # non-optimization repairs. If either of these two returns a + # non-zero status (errors fixed or remaining) then this fs is bad. + E2FSCK_FIXES_ONLY=1 + export E2FSCK_FIXES_ONLY + ${DBG} "@root_sbindir@/e2fsck" -E journal_only -p ${e2fsck_opts} "${snap_dev}" || return $? + ${DBG} "@root_sbindir@/e2fsck" -f -y ${e2fsck_opts} "${snap_dev}" +} + +mark_clean() { + ${DBG} "@root_sbindir@/tune2fs" -C 0 -T "${start_time}" "${dev}" +} + +mark_corrupt() { + ${DBG} "@root_sbindir@/tune2fs" -E force_fsck "${dev}" +} + +setup() { + # Try to remove snapshot for 30s, bail out if we can't remove it. + lvremove_deadline="$(( $(date "+%s") + 30))" + ${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}" 2>/dev/null + while [ -e "${snap_dev}" ] && [ "$?" -eq "5" ] && + [ "$(date "+%s")" -lt "${lvremove_deadline}" ]; do + sleep 0.5 + ${DBG} lvremove -f "${LVM2_VG_NAME}/${snap}" + done + if [ -e "${snap_dev}" ]; then + echo "${arg}: e2scrub snapshot is in use, cannot check!" + return 1 + fi + # Create the snapshot, wait for device to appear. + ${DBG} lvcreate -s -L "${snap_size_mb}m" -n "${snap}" "${LVM2_VG_NAME}/${LVM2_LV_NAME}" + if [ $? -ne 0 ]; then + echo "${arg}: e2scrub snapshot FAILED, will not check!" + return 1 + fi + ${DBG} udevadm settle 2> /dev/null + return 0 +} + +if [ "${reap}" -gt 0 ]; then + if [ -e "${snap_dev}" ]; then + teardown 2> /dev/null + fi + exit 0 +fi +if ! setup; then + exitcode 8 +fi +trap "teardown; exit 1" EXIT INT QUIT TERM + +# Check and react +check +case "$?" in +"0") + # Clean check! + echo "${arg}: Scrub succeeded." + mark_clean + teardown + trap '' EXIT + + # Trim the free space, which requires the snapshot be deleted. + if [ "${fstrim}" -eq 1 ] && [ -d "${mnt}" ] && type fstrim > /dev/null 2>&1; then + echo "${arg}: Trimming free space." + fstrim -v "${mnt}" + fi + + ret=0 + ;; +"8") + # Operational error, what now? + echo "${arg}: e2fsck operational error." + teardown + trap '' EXIT + ret=8 + ;; +*) + # fsck failed. Check if the snapshot is invalid; if so, make a + # note of that at the end of the log. This isn't necessarily a + # failure because the mounted fs could have overflowed the + # snapshot with regular disk writes /or/ our repair process + # could have done it by repairing too much. + # + # If it's really corrupt we ought to fsck at next boot. + is_invalid="$(lvs -o lv_snapshot_invalid --noheadings "${snap_dev}" | awk '{print $1}')" + if [ -n "${is_invalid}" ]; then + echo "${arg}: Scrub FAILED due to invalid snapshot." + ret=8 + else + echo "${arg}: Scrub FAILED due to corruption! Unmount and run e2fsck -y." + mark_corrupt + ret=6 + fi + teardown + trap '' EXIT + ;; +esac + +exitcode "${ret}" diff --git a/scrub/e2scrub.rules.in b/scrub/e2scrub.rules.in new file mode 100644 index 0000000..b6dc30b --- /dev/null +++ b/scrub/e2scrub.rules.in @@ -0,0 +1,2 @@ +# Try to hide our fsck snapshots from udev's /dev/disk linking... +ACTION=="add|change", ENV{DM_LV_NAME}=="*.e2scrub", ENV{UDISKS_IGNORE}="1", OPTIONS="link_priority=-100" diff --git a/scrub/e2scrub@.service.in b/scrub/e2scrub@.service.in new file mode 100644 index 0000000..496f894 --- /dev/null +++ b/scrub/e2scrub@.service.in @@ -0,0 +1,20 @@ +[Unit] +Description=Online ext4 Metadata Check for %I +OnFailure=e2scrub_fail@%i.service +Documentation=man:e2scrub(8) + +[Service] +Type=oneshot +WorkingDirectory=/ +PrivateNetwork=true +ProtectSystem=true +ProtectHome=read-only +PrivateTmp=yes +AmbientCapabilities=CAP_SYS_ADMIN CAP_SYS_RAWIO +NoNewPrivileges=yes +User=root +IOSchedulingClass=idle +CPUSchedulingPolicy=idle +Environment=SERVICE_MODE=1 +ExecStart=@root_sbindir@/e2scrub -t %I +SyslogIdentifier=%N diff --git a/scrub/e2scrub_all.8.in b/scrub/e2scrub_all.8.in new file mode 100644 index 0000000..99bdc0d --- /dev/null +++ b/scrub/e2scrub_all.8.in @@ -0,0 +1,47 @@ +.TH E2SCRUB 8 "@E2FSPROGS_MONTH@ @E2FSPROGS_YEAR@" "E2fsprogs version @E2FSPROGS_VERSION@" +.SH NAME +e2scrub_all - check all mounted ext[234] file systems for errors. +.SH SYNOPSIS +.B +e2scrub_all [OPTION] +.SH DESCRIPTION +Searches the system for all LVM logical volumes containing an ext2, ext3, or +ext4 file system, and checks them for problems. +The checking is performed by invoking the +.B e2scrub +tool, which will look for corruptions. +Corrupt file systems will be tagged as having errors so that fsck will be +invoked before the next mount. +If no errors are encountered, +.B fstrim +will be called on the file system if it is mounted. +See the +.B e2scrub +manual page for more information about how the checking is performed. +.SH OPTIONS +.TP +\fB-n\fR +Print what commands +.B e2scrub_all +would execute to initiate the e2scrub operations. +(Note: these commands will not actually be executed; however, since +.B e2scrub_all +needs to run some additional, privileged commands to query the +system to determine which +.B e2scrub +commands would be executed, it still needs to be run as root.) +.TP +\fB-r\fR +Remove e2scrub snapshots but do not check anything. +.TP +\fB-A\fR +Scrub all ext[234] file systems even if they are not mounted. +.TP +\fB-V\fR +Print version information and exit. +.SH SEE ALSO +.BR e2scrub "(8)" +.SH AUTHOR +Darrick J. Wong <darrick.wong@oracle.com> +.SH COPYRIGHT +Copyright \[co]2018 Oracle. License is GPLv2+. <http://www.gnu.org/licenses/gpl-2.0.html> diff --git a/scrub/e2scrub_all.cron.in b/scrub/e2scrub_all.cron.in new file mode 100644 index 0000000..395fb2a --- /dev/null +++ b/scrub/e2scrub_all.cron.in @@ -0,0 +1,2 @@ +30 3 * * 0 root test -e /run/systemd/system || SERVICE_MODE=1 @pkglibdir@/e2scrub_all_cron +10 3 * * * root test -e /run/systemd/system || SERVICE_MODE=1 @root_sbindir@/e2scrub_all -A -r diff --git a/scrub/e2scrub_all.in b/scrub/e2scrub_all.in new file mode 100644 index 0000000..4288b96 --- /dev/null +++ b/scrub/e2scrub_all.in @@ -0,0 +1,185 @@ +#!/bin/bash + +# Copyright (C) 2018 Oracle. All Rights Reserved. +# +# Author: Darrick J. Wong <darrick.wong@oracle.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + +PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin + +if (( $EUID != 0 )); then + echo "e2scrub_all must be run as root" + exit 1 +fi + +periodic_e2scrub=0 +scrub_all=0 +snap_size_mb=256 +reap=0 +conffile="@root_sysconfdir@/e2scrub.conf" + +test -f "${conffile}" && . "${conffile}" + +scrub_args="" + +print_help() { + echo "Usage: $0 [OPTIONS]" + echo " -n: Show what commands e2scrub_all would execute." + echo " -r: Remove e2scrub snapshots." + echo " -A: Scrub all ext[234] filesystems even if not mounted." + echo " -V: Print version information and exit." +} + +print_version() { + echo "e2scrub_all @E2FSPROGS_VERSION@ (@E2FSPROGS_DATE@)" +} + +exitcode() { + ret="$1" + + # If we're being run as a service, the return code must fit the LSB + # init script action error guidelines, which is to say that we + # compress all errors to 1 ("generic or unspecified error", LSB 5.0 + # section 22.2) and hope the admin will scan the log for what + # actually happened. + + if [ -n "${SERVICE_MODE}" -a "${ret}" -ne 0 ]; then + test "${ret}" -ne 0 && ret=1 + fi + + exit "${ret}" +} + +while getopts "nrAV" opt; do + case "${opt}" in + "n") DBG="echo Would execute: " ;; + "r") scrub_args="${scrub_args} -r"; reap=1;; + "A") scrub_all=1;; + "V") print_version; exitcode 0;; + *) print_help; exitcode 2;; + esac +done +shift "$((OPTIND - 1))" + +# If we're in service mode and the service is not enabled via config file... +if [ -n "${SERVICE_MODE}" -a "${periodic_e2scrub}" -ne 1 ]; then + # ...don't start e2scrub processes. + if [ "${reap}" -eq 0 ]; then + exitcode 0 + fi + + # ...and if we don't see any leftover e2scrub snapshots, don't + # run the reaping process either, because lvs can be slow. + if ! readlink -q -s -e /dev/mapper/*.e2scrub* > /dev/null; then + exitcode 0 + fi +fi + +# close file descriptor 3 (from cron) since it causes lvm to kvetch +exec 3<&- + +# If some prerequisite packages are not installed, exit with a code +# indicating success to avoid spamming the sysadmin with fail messages +# when e2scrub_all is run out of cron or a systemd timer. + +if ! type mapfile >& /dev/null ; then + test -n "${SERVICE_MODE}" && exitcode 0 + echo "e2scrub_all: can't find mapfile --- is bash 4.xx installed?" + exitcode 1 +fi + +if ! type lsblk >& /dev/null ; then + test -n "${SERVICE_MODE}" && exitcode 0 + echo "e2scrub_all: can't find lsblk --- is util-linux installed?" + exitcode 1 +fi + +if ! type lvcreate >& /dev/null ; then + test -n "${SERVICE_MODE}" && exitcode 0 + echo "e2scrub_all: can't find lvcreate --- is lvm2 installed?" + exitcode 1 +fi + +# Find scrub targets, make sure we only do this once. +ls_scan_targets() { + local devices=$(lvs -o lv_path --noheadings -S "lv_active=active,lv_role=public,lv_role!=snapshot,vg_free>=${snap_size_mb}") + + if [ -z "$devices" ]; then + return 0; + fi + lsblk -o NAME,MOUNTPOINT,FSTYPE,TYPE -P -n -p $devices | \ + grep FSTYPE=\"ext\[234\]\" | grep TYPE=\"lvm\" | \ + while read vars ; do + eval "${vars}" + + if [ "${scrub_all}" -eq 1 ] || [ -n "${MOUNTPOINT}" ]; then + echo ${MOUNTPOINT:-${NAME}} + fi + done +} + +# Find leftover scrub snapshots +ls_reap_targets() { + lvs -o lv_path -S lv_role=snapshot -S lv_name=~\(e2scrub$\) \ + --noheadings | sed -e 's/.e2scrub$//' +} + +# Figure out what we're targeting +ls_targets() { + if [ "${reap}" -eq 1 ]; then + ls_reap_targets + else + ls_scan_targets + fi +} + +# systemd doesn't know to do path escaping on the instance variable we pass +# to the e2scrub service, which breaks things if there is a dash in the path +# name. Therefore, do the path escaping ourselves if needed. +# +# systemd path escaping also drops the initial slash so we add that back in so +# that log messages from the service units preserve the full path and users can +# look up log messages using full paths. However, for "/" the escaping rules +# do /not/ drop the initial slash, so we have to special-case that here. +escape_path_for_systemd() { + local path="$1" + + if [ "${path}" != "/" ]; then + echo "-$(systemd-escape --path "${path}")" + else + echo "-" + fi +} + +# Scrub any mounted fs on lvm by creating a snapshot and fscking that. +mapfile -t targets < <(ls_targets) +for tgt in "${targets[@]}"; do + # If we're not reaping and systemd is present, try invoking the + # systemd service. + if [ "${reap}" -ne 1 ] && type systemctl > /dev/null 2>&1; then + tgt_esc="$(escape_path_for_systemd "${tgt}")" + ${DBG} systemctl start "e2scrub@${tgt_esc}" 2> /dev/null + res=$? + if [ "${res}" -eq 0 ] || [ "${res}" -eq 1 ]; then + continue; + fi + fi + + # Otherwise use direct invocation + ${DBG} "@root_sbindir@/e2scrub" ${scrub_args} "${tgt}" +done + +exitcode 0 diff --git a/scrub/e2scrub_all.service.in b/scrub/e2scrub_all.service.in new file mode 100644 index 0000000..20f42bf --- /dev/null +++ b/scrub/e2scrub_all.service.in @@ -0,0 +1,12 @@ +[Unit] +Description=Online ext4 Metadata Check for All Filesystems +ConditionACPower=true +ConditionCapability=CAP_SYS_ADMIN +ConditionCapability=CAP_SYS_RAWIO +Documentation=man:e2scrub_all(8) + +[Service] +Type=oneshot +Environment=SERVICE_MODE=1 +ExecStart=@root_sbindir@/e2scrub_all +SyslogIdentifier=e2scrub_all diff --git a/scrub/e2scrub_all.timer.in b/scrub/e2scrub_all.timer.in new file mode 100644 index 0000000..3d558bb --- /dev/null +++ b/scrub/e2scrub_all.timer.in @@ -0,0 +1,11 @@ +[Unit] +Description=Periodic ext4 Online Metadata Check for All Filesystems + +[Timer] +# Run on Sunday at 3:10am, to avoid running afoul of DST changes +OnCalendar=Sun *-*-* 03:10:00 +RandomizedDelaySec=60 +Persistent=true + +[Install] +WantedBy=timers.target diff --git a/scrub/e2scrub_all_cron.in b/scrub/e2scrub_all_cron.in new file mode 100644 index 0000000..fcfe415 --- /dev/null +++ b/scrub/e2scrub_all_cron.in @@ -0,0 +1,69 @@ +#!/bin/bash + +# Copyright (C) 2018 Oracle. All Rights Reserved. +# +# Author: Darrick J. Wong <darrick.wong@oracle.com> +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + +# Run e2scrub_all from a cronjob if we don't have systemd and we're not +# running on AC power. + +on_ac_power() { + local any_known=no + + # try sysfs power class first + if [ -d /sys/class/power_supply ]; then + for psu in /sys/class/power_supply/*; do + if [ -r "$psu/type" ]; then + type=$(cat "$psu/type") + + # ignore batteries + [ "$type" = "Battery" ] && continue + + online=$(cat "$psu/online") + + [ "$online" = 1 ] && return 0 + [ "$online" = 0 ] && any_known=yes + fi + done + + [ "$any_known" = "yes" ] && return 1 + fi + + # else fall back to AC adapters in /proc + if [ -d /proc/acpi/ac_adapter ]; then + for ac in /proc/acpi/ac_adapter/*; do + if [ -r "$ac/state" ]; then + grep -q on-line "$ac/state" && return 0 + grep -q off-line "$ac/state" && any_known=yes + elif [ -r "$ac/status" ]; then + grep -q on-line "$ac/status" && return 0 + grep -q off-line "$ac/status" && any_known=yes + fi + done + + [ "$any_known" = "yes" ] && return 1 + fi + + # Can't tell, just assume we're on AC. + return 0 +} + +test -e @root_sbindir@/e2scrub_all || exit 0 +test -e /run/systemd/system && exit 0 +on_ac_power || exit 0 + +exec @root_sbindir@/e2scrub_all diff --git a/scrub/e2scrub_fail.in b/scrub/e2scrub_fail.in new file mode 100644 index 0000000..2c0754a --- /dev/null +++ b/scrub/e2scrub_fail.in @@ -0,0 +1,38 @@ +#!/bin/bash + +# Email logs of failed e2scrub unit runs when the systemd service fails. + +device="$1" +test -z "${device}" && exit 0 + +if ! type sendmail > /dev/null 2>&1; then + echo "$0: sendmail program not found." + exit 1 +fi + +if test -f /etc/e2scrub.conf ; then + . /etc/e2scrub.conf +fi + +hostname="$(hostname -f 2>/dev/null)" +test -z "${hostname}" && hostname="${HOSTNAME}" +service_name="e2scrub@$(systemd-escape ${device})" + +if test -z "${recipient}" ; then + recipient="root" +fi + +if test -z "${sender}" ; then + sender="<e2scrub@${hostname}>" +fi + +(cat << ENDL +To: ${recipient} +From: ${sender} +Subject: e2scrub failure on ${device} + +So sorry, the automatic e2scrub of ${device} on ${hostname} failed. + +A log of what happened follows: +ENDL +systemctl status --full --lines 4294967295 "${service_name}") | sendmail -t -i diff --git a/scrub/e2scrub_fail@.service.in b/scrub/e2scrub_fail@.service.in new file mode 100644 index 0000000..4bad311 --- /dev/null +++ b/scrub/e2scrub_fail@.service.in @@ -0,0 +1,10 @@ +[Unit] +Description=Online ext4 Metadata Check Failure Reporting for %I +Documentation=man:e2scrub(8) + +[Service] +Type=oneshot +ExecStart=@pkglibdir@/e2scrub_fail "%I" +User=mail +Group=mail +SupplementaryGroups=systemd-journal diff --git a/scrub/e2scrub_reap.service.in b/scrub/e2scrub_reap.service.in new file mode 100644 index 0000000..58a4565 --- /dev/null +++ b/scrub/e2scrub_reap.service.in @@ -0,0 +1,25 @@ +[Unit] +Description=Remove Stale Online ext4 Metadata Check Snapshots +ConditionCapability=CAP_SYS_ADMIN +ConditionCapability=CAP_SYS_RAWIO +Documentation=man:e2scrub_all(8) + +[Service] +Type=oneshot +WorkingDirectory=/ +PrivateNetwork=true +ProtectSystem=true +ProtectHome=read-only +PrivateTmp=yes +AmbientCapabilities=CAP_SYS_ADMIN CAP_SYS_RAWIO +NoNewPrivileges=yes +User=root +IOSchedulingClass=idle +CPUSchedulingPolicy=idle +Environment=SERVICE_MODE=1 +ExecStart=@root_sbindir@/e2scrub_all -A -r +SyslogIdentifier=%N +RemainAfterExit=no + +[Install] +WantedBy=multi-user.target |