summaryrefslogtreecommitdiffstats
path: root/debian/local
diff options
context:
space:
mode:
Diffstat (limited to 'debian/local')
-rw-r--r--debian/local/apport/source_mdadm.py60
-rwxr-xr-xdebian/local/bin/checkarray221
-rwxr-xr-xdebian/local/bin/mkconf104
-rw-r--r--debian/local/doc/FAQ669
-rw-r--r--debian/local/doc/README.checkarray33
-rw-r--r--debian/local/doc/README.recipes168
-rwxr-xr-xdebian/local/initramfs-tools/local-block/mdadm61
-rwxr-xr-xdebian/local/initramfs-tools/local-bottom/mdadm3
-rwxr-xr-xdebian/local/reportbug/script219
9 files changed, 1538 insertions, 0 deletions
diff --git a/debian/local/apport/source_mdadm.py b/debian/local/apport/source_mdadm.py
new file mode 100644
index 0000000..0aad41b
--- /dev/null
+++ b/debian/local/apport/source_mdadm.py
@@ -0,0 +1,60 @@
+'''apport package hook for mdadm
+
+(c) 2009-2016 Canonical Ltd.
+Author: Steve Beattie <sbeattie@ubuntu.com>
+
+Based on the ideas in debian's /usr/share/bug/mdadm/script
+'''
+
+from apport.hookutils import attach_file, attach_file_if_exists, attach_hardware, path_to_key, command_output
+import os
+import re
+import glob
+import gzip
+import subprocess
+import sys
+
+
+def get_initrd_files(pattern):
+ '''Extract listing of files from the current initrd which match a regex.
+
+ pattern should be a "re" object. '''
+
+ (_, _, release, _, _) = os.uname()
+ try:
+ fd = gzip.GzipFile('/boot/initrd.img-' + release, 'rb')
+ # universal_newlines needs to be False here as we're passing
+ # binary data from gzip into cpio, which means we'll need to
+ # decode the bytes into strings later when reading the output
+ cpio = subprocess.Popen(['cpio', '-t'], close_fds=True, stderr=subprocess.STDOUT,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ universal_newlines=False)
+ except OSError as e:
+ return 'Error: ' + str(e)
+
+ out = cpio.communicate(fd.read())[0].decode(sys.stdout.encoding, errors='replace')
+ if cpio.returncode != 0:
+ return 'Error: command %s failed with exit code %i %' % (
+ 'cpio', cpio.returncode, out)
+
+ lines = ''.join([l for l in out.splitlines(True) if pattern.search(l)])
+ return lines
+
+
+def add_info(report):
+ attach_hardware(report)
+ attach_file(report, '/proc/mounts', 'ProcMounts')
+ attach_file_if_exists(report, '/etc/mdadm/mdadm.conf', 'mdadm.conf')
+ attach_file(report, '/proc/mdstat', 'ProcMDstat')
+ attach_file(report, '/proc/partitions', 'ProcPartitions')
+ attach_file(report, '/etc/blkid.tab', 'etc.blkid.tab')
+ attach_file_if_exists(report, '/boot/grub/menu.lst', 'GrubMenu.lst')
+ attach_file_if_exists(report, '/boot/grub/grub.cfg', 'Grub.cfg')
+ attach_file_if_exists(report, '/etc/lilo.conf', 'lilo.conf')
+
+ devices = glob.glob("/dev/[hs]d*")
+ for dev in devices:
+ report['MDadmExamine' + path_to_key(dev)] = command_output(['/sbin/mdadm', '-E', dev])
+
+ initrd_re = re.compile('md[a/]')
+ report['initrd.files'] = get_initrd_files(initrd_re)
diff --git a/debian/local/bin/checkarray b/debian/local/bin/checkarray
new file mode 100755
index 0000000..2fb7ee7
--- /dev/null
+++ b/debian/local/bin/checkarray
@@ -0,0 +1,221 @@
+#!/bin/sh
+#
+# checkarray -- initiates a check run of an MD array's redundancy information.
+#
+# Copyright © martin f. krafft <madduck@debian.org>
+# distributed under the terms of the Artistic Licence 2.0
+#
+set -eu
+
+PROGNAME=${0##*/}
+
+about()
+{
+ echo "\
+$PROGNAME -- MD array (RAID) redundancy checker tool
+Copyright © martin f. krafft <madduck@debian.org>
+Released under the terms of the Artistic Licence 2.0"
+}
+
+usage()
+{
+ about
+ echo "
+Usage: $PROGNAME [options] [arrays]
+
+Valid options are:
+ -a|--all check all assembled arrays (ignores arrays in command line).
+ -s|--status print redundancy check status of devices.
+ -x|--cancel queue a request to cancel a running redundancy check.
+ -r|--repair repair instead of check
+ -i|--idle perform check in a lowest scheduling class (idle)
+ -l|--slow perform check in a lower-than-standard scheduling class
+ -f|--fast perform check in higher-than-standard scheduling class
+ --realtime perform check in real-time scheduling class (DANGEROUS!)
+ -c|--cron honour AUTOCHECK setting in /etc/default/mdadm.
+ -q|--quiet suppress informational messages
+ (use twice to suppress error messages too).
+ -h|--help show this output.
+ -V|--version show version information.
+
+Examples:
+ $PROGNAME --all --idle
+ $PROGNAME --quiet /dev/md[123]
+ $PROGNAME -sa
+ $PROGNAME -x --all
+
+Devices can be specified in almost any format. The following are equivalent:
+ /dev/md0, md0, /dev/md/0, /sys/block/md0
+
+You can also control the status of a check/repair with /proc/mdstat file."
+}
+
+SHORTOPTS=achVqQsxrilf
+LONGOPTS=all,cron,help,version,quiet,real-quiet,status,cancel,repair,idle,slow,fast,realtime
+
+eval set -- $(getopt -o $SHORTOPTS -l $LONGOPTS -n $PROGNAME -- "$@")
+
+arrays=''
+cron=0
+all=0
+quiet=0
+status=0
+action=check
+ionice=
+
+for opt in $@; do
+ case "$opt" in
+ -a|--all) all=1;;
+ -s|--status) action=status;;
+ -x|--cancel) action=idle;;
+ -r|--repair) action=repair;;
+ -i|--idle) ionice=idle;;
+ -l|--slow) ionice=low;;
+ -f|--fast) ionice=high;;
+ --realtime) ionice=realtime;;
+ -c|--cron) cron=1;;
+ -q|--quiet) quiet=$(($quiet+1));;
+ -Q|--real-quiet) quiet=$(($quiet+2));; # for compatibility
+ -h|--help) usage; exit 0;;
+ -V|--version) about; exit 0;;
+ /dev/md/*|md/*) arrays="${arrays:+$arrays }md${opt#*md/}";;
+ /dev/md*|md*) arrays="${arrays:+$arrays }${opt#/dev/}";;
+ /sys/block/md*) arrays="${arrays:+$arrays }${opt#/sys/block/}";;
+ --) :;;
+ *) echo "$PROGNAME: E: invalid option: $opt. Try --help." >&2; exit 1;;
+ esac
+done
+
+is_true()
+{
+ case "${1:-}" in
+ [Yy]es|[Yy]|1|[Tt]rue|[Tt]) return 0;;
+ *) return 1;
+ esac
+}
+
+DEBIANCONFIG=/etc/default/mdadm
+[ -r $DEBIANCONFIG ] && . $DEBIANCONFIG
+if [ $cron = 1 ] && ! is_true ${AUTOCHECK:-false}; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: disabled in $DEBIANCONFIG ." >&2
+ exit 0
+fi
+
+if [ ! -f /proc/mdstat ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: MD subsystem not loaded, or /proc unavailable." >&2
+ exit 2
+fi
+
+if [ ! -d /sys/block ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: /sys filesystem not available." >&2
+ exit 7
+fi
+
+if [ -z "$(ls /sys/block/md* 2>/dev/null)" ]; then
+ if [ $quiet -lt 2 ] && [ $cron != 1 ]; then
+ echo "$PROGNAME: W: no active MD arrays found." >&2
+ echo "$PROGNAME: W: (maybe uninstall the mdadm package?)" >&2
+ fi
+ exit 0
+fi
+
+if [ -z "$(ls /sys/block/md*/md/level 2>/dev/null)" ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: kernel too old, no support for redundancy checks." >&2
+ exit 6
+fi
+
+if ! egrep -q '^raid([1456]|10)$' /sys/block/md*/md/level 2>/dev/null; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: no redundant arrays present; skipping checks..." >&2
+ exit 0
+fi
+
+if [ -z "$(ls /sys/block/md*/md/sync_action 2>/dev/null)" ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: no kernel support for redundancy checks." >&2
+ exit 3
+fi
+
+[ $all = 1 ] && arrays="$(ls -d1 /sys/block/md* | cut -d/ -f4)"
+
+for array in $arrays; do
+ MDBASE=/sys/block/$array/md
+
+ if [ ! -e $MDBASE/sync_action ]; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: skipping non-redundant array $array." >&2
+ continue
+ fi
+
+ cur_status="$(cat $MDBASE/sync_action)"
+
+ if [ $action = status ]; then
+ echo "$array: $cur_status"
+ continue
+ fi
+
+ if [ ! -w $MDBASE/sync_action ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: E: $MDBASE/sync_action not writeable." >&2
+ exit 4
+ fi
+
+ if [ "$(cat $MDBASE/array_state)" = 'read-auto' ]; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: W: array $array in auto-read-only state, skipping..." >&2
+ continue
+ fi
+
+ case "$action" in
+ idle)
+ echo $action > $MDBASE/sync_action
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: cancel request queued for array $array." >&2
+ ;;
+
+ check|repair)
+ if [ "$cur_status" != idle ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: W: array $array not idle, skipping..." >&2
+ continue
+ fi
+
+ # check if the array created recently and skip test if it is
+ created=$(mdadm --detail /dev/$array 2>/dev/null |
+ sed -n 's/.*Creation Time *://p' )
+ if [ -n "$created" ]; then
+ created=$(date +%s -d "$created" 2>/dev/null)
+ fi
+ if [ -n "$created" ]; then
+ now=$(date +%s)
+ if [ "$created" -lt "$now" -a \
+ "$created" -gt "$(($now - 14 * 24 * 60 * 60))" ]; then
+ [ $quiet -lt 2 ] && echo "$PROGNAME: I: array $array created recently, skipping..." >&2
+ continue
+ fi
+ fi
+
+ # queue request for the array. The kernel will make sure that these requests
+ # are properly queued so as to not kill one of the arrays.
+ echo $action > $MDBASE/sync_action
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: $action queued for array $array." >&2
+
+ case "$ionice" in
+ idle) ioarg='-c3'; renice=15;;
+ low) ioarg='-c2 -n7'; renice=5;;
+ high) ioarg='-c2 -n0'; renice=0;;
+ realtime) ioarg='-c1 -n4'; renice=-5;;
+ *) continue;;
+ esac
+
+ resync_pid= wait=5
+ while [ $wait -gt 0 ]; do
+ wait=$((wait - 1))
+ resync_pid=$(ps -ef | awk -v dev=$array 'BEGIN { pattern = "^\\[" dev "_resync]$" } $8 ~ pattern { print $2 }')
+ if [ -n "$resync_pid" ]; then
+ [ $quiet -lt 1 ] && echo "$PROGNAME: I: selecting $ionice I/O scheduling class and $renice niceness for resync of $array." >&2
+ ionice -p "$resync_pid" $ioarg 2>/dev/null || :
+ renice -n $renice -p "$resync_pid" 1>/dev/null 2>&1 || :
+ break
+ fi
+ sleep 1
+ done
+ ;;
+ esac
+
+done
+
+exit 0
diff --git a/debian/local/bin/mkconf b/debian/local/bin/mkconf
new file mode 100755
index 0000000..4dd09b1
--- /dev/null
+++ b/debian/local/bin/mkconf
@@ -0,0 +1,104 @@
+#!/bin/sh
+#
+# mkconf -- outputs valid mdadm.conf contents for the local system
+#
+# Copyright © martin f. krafft <madduck@madduck.net>
+# distributed under the terms of the Artistic Licence 2.0
+#
+set -eu
+
+ME="${0##*/}"
+MDADM=/sbin/mdadm
+DEBIANCONFIG=/etc/default/mdadm
+CONFIG=/etc/mdadm/mdadm.conf
+
+# initialise config variables in case the environment leaks
+MAILADDR= DEVICE= HOMEHOST= PROGRAM=
+
+test -r $DEBIANCONFIG && . $DEBIANCONFIG
+
+if [ -n "${MDADM_MAILADDR__:-}" ]; then
+ # honour MAILADDR from the environment (from postinst)
+ MAILADDR="$MDADM_MAILADDR__"
+else
+ # preserve existing MAILADDR
+ MAILADDR="$(sed -ne 's/^MAILADDR //p' $CONFIG 2>/dev/null)" || :
+fi
+
+# save existing values as defaults
+if [ -r "$CONFIG" ]; then
+ DEVICE="$(sed -ne 's/^DEVICE //p' $CONFIG)"
+ HOMEHOST="$(sed -ne 's/^HOMEHOST //p' $CONFIG)"
+ PROGRAM="$(sed -ne 's/^PROGRAM //p' $CONFIG)"
+fi
+
+[ "${1:-}" = force-generate ] && rm -f $CONFIG
+case "${1:-}" in
+ generate|force-generate)
+ [ -n "${2:-}" ] && CONFIG=$2
+ # only barf if the config file specifies anything else than MAILADDR
+ if egrep -qv '^(MAILADDR.*|#.*|)$' $CONFIG 2>/dev/null; then
+ echo "E: $ME: $CONFIG already exists." >&2
+ exit 255
+ fi
+
+ mkdir --parent ${CONFIG%/*}
+ exec >$CONFIG
+ ;;
+esac
+
+cat <<_eof
+# mdadm.conf
+#
+# !NB! Run update-initramfs -u after updating this file.
+# !NB! This will ensure that initramfs has an uptodate copy.
+#
+# Please refer to mdadm.conf(5) for information about this file.
+#
+
+# by default (built-in), scan all partitions (/proc/partitions) and all
+# containers for MD superblocks. alternatively, specify devices to scan, using
+# wildcards if desired.
+#DEVICE ${DEVICE:-partitions containers}
+
+# automatically tag new arrays as belonging to the local system
+HOMEHOST ${HOMEHOST:-<system>}
+
+# instruct the monitoring daemon where to send mail alerts
+MAILADDR ${MAILADDR:-root}
+
+_eof
+
+if [ -n "${PROGRAM:-}" ]; then
+ cat <<-_eof
+ # program to run when mdadm monitor detects potentially interesting events
+ PROGRAM ${PROGRAM}
+
+ _eof
+fi
+
+error=0
+if [ ! -r /proc/mdstat ]; then
+ echo W: $ME: MD subsystem is not loaded, thus I cannot scan for arrays. >&2
+ error=1
+elif [ ! -r /proc/partitions ]; then
+ echo W: $ME: /proc/partitions cannot be read, thus I cannot scan for arrays. >&2
+ error=2
+else
+ echo "# definitions of existing MD arrays"
+ if ! $MDADM --examine --scan --config=partitions; then
+ error=$(($? + 128))
+ echo W: $ME: failed to scan for partitions. >&2
+ echo "### WARNING: scan failed."
+ else
+ echo
+ fi
+fi
+
+if [ -z "${SOURCE_DATE_EPOCH:-}" ]; then
+ echo "# This configuration was auto-generated on $(date -R) by mkconf"
+else
+ echo "# This configuration was auto-generated on $(date -R --utc -d@$SOURCE_DATE_EPOCH) by mkconf"
+fi
+
+exit $error
diff --git a/debian/local/doc/FAQ b/debian/local/doc/FAQ
new file mode 100644
index 0000000..40e0aba
--- /dev/null
+++ b/debian/local/doc/FAQ
@@ -0,0 +1,669 @@
+Frequently asked questions -- Debian mdadm
+==========================================
+
+Also see /usr/share/doc/mdadm/README.recipes.gz .
+
+The latest version of this FAQ is available here:
+ http://anonscm.debian.org/gitweb/?p=pkg-mdadm/mdadm.git;a=blob_plain;f=debian/FAQ;hb=HEAD
+
+0. What does MD stand for?
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+ MD is an abbreviation for "multiple device" (also often called "multi-
+ disk"). The Linux MD implementation implements various strategies for
+ combining multiple (typically but not necessarily physical) block devices
+ into single logical ones. The most common use case is commonly known as
+ "Software RAID". Linux supports RAID levels 1, 4, 5, 6 and 10 as well
+ as the "pseudo" RAID level 0.
+ In addition, the MD implementation covers linear and multipath
+ configurations.
+
+ Most people refer to MD as RAID. Since the original name of the RAID
+ configuration software is "md"adm, I chose to use MD consistently instead.
+
+1. How do I overwrite ("zero") the superblock?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --zero-superblock /dev/sdXY
+
+ Note that this is a destructive operation. It does not actually delete any
+ data, but the device will have lost its "authority". You cannot assemble the
+ array with it anymore and if you add the device to another array, the
+ synchronisation process *will* *overwrite* all data on the device.
+
+ Nevertheless, sometimes it is necessary to zero the superblock:
+
+ - If you want ot re-use a device (e.g. a HDD or SSD) that has been part of an
+ array (with an different superblock version and/or location) in another one.
+ In this case you zero the superblock before you assemble the array or add
+ the device to a new array.
+
+ - If you are trying to prevent a device from being recognised as part of an
+ array. Say for instance you are trying to change an array spanning sd[ab]1
+ to sd[bc]1 (maybe because sda is failing or too slow), then automatic
+ (scan) assembly will still recognise sda1 as a valid device. You can limit
+ the devices to scan with the DEVICE keyword in the configuration file, but
+ this may not be what you want. Instead, zeroing the superblock will
+ (permanently) prevent a device from being considered as part of an array.
+
+ WARNING: Depending on which superblock version you use, it won't work to just
+ overwrite the first few MiBs of the block device with 0x0 (e.g. via
+ dd), since the superblock may be at other locations (especially the
+ end of the device).
+ Therefore always use mdadm --zero-superblock .
+
+2. How do I change the preferred minor of an MD array (RAID)?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ See item 12 in /usr/share/doc/mdadm/README.recipes.gz and read the mdadm(8)
+ manpage (search for 'preferred').
+
+3. How does mdadm determine which /dev/mdX or /dev/md/X to use?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The logic used by mdadm to determine the device node name in the mdadm
+ --examine output (which is used to generate mdadm.conf) depends on several
+ factors. Here's how mdadm determines it:
+
+ It first checks the superblock version of a given array (or each array in
+ turn when iterating all of them). Run
+
+ mdadm --detail /dev/mdX | sed -ne 's,.*Version : ,,p'
+
+ to determine the superblock version of a running array, or
+
+ mdadm --examine /dev/sdXY | sed -ne 's,.*Version : ,,p'
+
+ to determine the superblock version from a component device of an array.
+
+ Version 0 superblocks (00.90.XX)
+ ''''''''''''''''''''''''''''''''
+ You need to know the preferred minor number stored in the superblock,
+ so run either of
+
+ mdadm --detail /dev/mdX | sed -ne 's,.*Preferred Minor : ,,p'
+ mdadm --examine /dev/sdXY | sed -ne 's,.*Preferred Minor : ,,p'
+
+ Let's call the resulting number MINOR. Also see FAQ 2 further up.
+
+ Given MINOR, mdadm will output /dev/md<MINOR> if the device node
+ /dev/md<MINOR> exists.
+ Otherwise, it outputs /dev/md/<MINOR>
+
+ Version 1 superblocks (01.XX.XX)
+ ''''''''''''''''''''''''''''''''
+ Version 1 superblocks actually seem to ignore preferred minors and instead
+ use the value of the name field in the superblock. Unless specified
+ explicitly during creation (-N|--name) the name is determined from the
+ device name used, using the following regexp: 's,/dev/md/?(.*),$1,', thus:
+
+ /dev/md0 -> 0
+ /dev/md/0 -> 0
+ /dev/md_d0 -> _d0 (d0 in later versions)
+ /dev/md/d0 -> d0
+ /dev/md/name -> name
+ (/dev/name does not seem to work)
+
+ mdadm will append the name to '/dev/md/', so it will always output device
+ names under the /dev/md/ directory. Newer versions can create a symlink
+ from /dev/mdX. See the symlinks option in mdadm.con(5) and mdadm(8).
+
+ If you want to change the name, you can do so during assembly:
+
+ mdadm -A -U name -N newname /dev/mdX /dev/sd[abc]X
+
+ I know this all sounds inconsistent and upstream has some work to do.
+ We're on it.
+
+4. Which RAID level should I use?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Many people seem to prefer RAID4/5/6 because it makes more efficient use of
+ space. For example, if you have devices of size X, then in order to get 2X
+ storage, you need 3 devices for RAID5, but 4 if you use RAID10 or RAID1+ (or
+ RAID6).
+
+ This gain in usable space comes at a price: performance; RAID1/10 can be up
+ to four times faster than RAID4/5/6.
+
+ At the same time, however, RAID4/5/6 provide somewhat better redundancy in
+ the event of two failing devices. In a RAID10 configuration, if one device is
+ already dead, the RAID can only survive if any of the two devices in the other
+ RAID1 array fails, but not if the second device in the degraded RAID1 array
+ fails (see next item, 4b). A RAID6 across four devices can cope with any two
+ devices failing. However, RAID6 is noticeably slower than RAID5. RAID5 and
+ RAID4 do not differ much, but can only handle single-device failures.
+
+ If you can afford the extra devices (storage *is* cheap these days), I suggest
+ RAID1/10 over RAID4/5/6. If you don't care about performance but need as
+ much space as possible, go with RAID4/5/6, but make sure to have backups.
+ Heck, make sure to have backups whatever you do.
+
+ Let it be said, however, that I thoroughly regret putting my primary
+ workstation on RAID5. Anything device-intensive brings the system to its
+ knees; I will have to migrate to RAID10 at one point.
+
+ Please also consult /usr/share/doc/mdadm/RAID5_versus_RAID10.txt.gz,
+ https://en.wikipedia.org/wiki/Standard_RAID_levels and perhaps even
+ https://en.wikipedia.org/wiki/Non-standard_RAID_levels .
+
+4b. Can a 4-device RAID10 survive two device failures?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ I am assuming that you are talking about a setup with two copies of each
+ block, so --layout=near2/far2/offset2:
+
+ In two thirds of the cases, yes[0], and it does not matter which layout you
+ use. When you assemble 4 devices into a RAID10, you essentially stripe a RAID0
+ across two RAID1, so the four devices A,B,C,D become two pairs: A,B and C,D.
+ If A fails, the RAID10 can only survive if the second failing device is either
+ C or D; if B fails, your array is dead.
+
+ Thus, if you see a device failing, replace it as soon as possible!
+
+ If you need to handle two failing devices out of a set of four, you have to
+ use RAID6, or store more than two copies of each block (see the --layout
+ option in the mdadm(8) manpage).
+
+ See also question 18 further down.
+
+ [0] It's actually (n-2)/(n-1), where n is the number of devices. I am not
+ a mathematician, see http://aput.net/~jheiss/raid10/, which gives the
+ chance of *failure* as 1/(n-1), so the chance of success is 1-1/(n-1), or
+ (n-2)/(n-1), or 2/3 in the four device example.
+ (Thanks to Per Olofsson for clarifying this in #493577).
+
+5. How to convert RAID5 to RAID10?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ To convert 3 device RAID5 to RAID10, you need a spare device (either a hot
+ spare, fourth device in the array, or a new one). Then you remove the spare
+ and one of the three devices from the RAID5, create a degraded RAID10 across
+ them, create the filesystem and copy the data (or do a raw copy), then add the
+ other two devices to the new RAID10. However, mdadm cannot assemble a RAID10
+ with 50% missing devices the way you might like it:
+
+ mdadm --create -l 10 -n4 -pn2 /dev/md1 /dev/sd[cd] missing missing
+
+ For reasons that may be answered by question 20 further down, mdadm actually
+ cares about the order of devices you give it. If you intersperse the "missing"
+ keywords with the physical devices, it should work:
+
+ mdadm --create -l 10 -n4 -pn2 /dev/md1 /dev/sdc missing /dev/sdd missing
+
+ or even
+
+ mdadm --create -l 10 -n4 -pn2 /dev/md1 missing /dev/sd[cd] missing
+
+ Also see item (4b) further up, and this thread:
+ http://thread.gmane.org/gmane.linux.raid/13469/focus=13472
+
+6. What is the difference between RAID1+0 and RAID10?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ RAID1+0 is a form of RAID in which a RAID0 is striped across two RAID1
+ arrays. To assemble it, you create two RAID1 arrays and then create a RAID0
+ array with the two md arrays.
+
+ The Linux kernel provides the RAID10 level to do pretty much exactly the
+ same for you, but with greater flexibility (and somewhat improved
+ performance). While RAID1+0 makes sense with 4 devices, RAID10 can be
+ configured to work with only 3 devices. Also, RAID10 has a little less
+ overhead than RAID1+0, which has data pass the md layer twice.
+
+ I prefer RAID10 over RAID1+0.
+
+6b. What's the difference between RAID1+0 and RAID0+1?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ In short: RAID1+0 concatenates two mirrored arrays while RAID0+1 mirrors two
+ concatenated arrays. However, the two are also often switched.
+
+ The linux MD driver supports RAID10, which is equivalent to the above
+ RAID1+0 definition.
+
+ RAID1+0/10 has a greater chance to survive two device failures, its
+ performance suffers less when in degraded state, and it resyncs faster after
+ replacing a failed device.
+
+ See http://aput.net/~jheiss/raid10/ for more details.
+
+7. Which RAID10 layout scheme should I use
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ RAID10 gives you the choice between three ways of laying out chunks on the
+ devices: near, far and offset.
+
+ The examples below explain the chunk distribution for each of these layouts
+ with 2 copies per chunk, using either an even number of devices (fewer than 4)
+ or an odd number (fewer than 5).
+
+ For simplicity we assume that the chunk size matches the block size of the
+ underlying devices and also the RAID10 device exported by the kernel
+ (e.g. /dev/md/name). The chunk numbers map therefore directly to the block
+ addresses in the exported RAID10 device.
+
+ The decimal numbers below (0, 1, 2, …) are the RAID10 chunks. Due to the
+ foregoing assumption they are also the block addresses in the exported RAID10
+ device. Identical numbers refer to copies of a chunk or block, but on different
+ underlying devices. The hexadecimal numbers (0x00, 0x01, 0x02, …) refer to the
+ block addresses in the underlying devices.
+
+ "near" layout with 2 copies per chunk (--layout=n2):
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The chunk copies are placed "as close to each other as possible".
+
+ With an even number of devices, they lie at the same offset on the each device.
+ It is a classic RAID1+0 setup, i.e. two groups of mirrored devices, with both
+ forming a striped RAID0.
+
+ device1 device2 device3 device4 device1 device2 device3 device4 device5
+ ─────── ─────── ─────── ─────── ─────── ─────── ─────── ─────── ───────
+ 0 0 1 1 0x00 0 0 1 1 2
+ 2 2 3 3 0x01 2 3 3 4 4
+ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯
+ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮
+ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯
+ 254 254 255 255 0x80 317 318 318 319 319
+ ╰──────┬──────╯ ╰──────┬──────╯
+ RAID1 RAID1
+ ╰──────────────┬──────────────╯
+ RAID0
+
+ "far" layout with 2 copies per chunk (--layout=f2):
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The chunk copies are placed "as far from each other as possible".
+
+ Here, a complete sequence of chunks is striped over all devices. Then a second
+ sequence of chunks is placed next to them. More copies are added as the number
+ 2 goes up.
+
+ It is undesirable, however, to place copies of the same chunks on the same
+ devices. That is prevented by a cyclic permutation of each such stripe.
+
+ device1 device2 device3 device4 device1 device2 device3 device4 device5
+ ─────── ─────── ─────── ─────── ─────── ─────── ─────── ─────── ───────
+ 0 1 2 3 0x00 0 1 2 3 4 ╮
+ 4 5 6 7 0x01 5 6 7 8 9 ├ ▒
+ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ┆
+ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ┆
+ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ┆
+ 252 253 254 255 0x40 315 316 317 318 319 ╯
+ 3 0 1 2 0x41 4 0 1 2 3 ╮
+ 7 4 5 6 0x42 9 5 6 7 8 ├ ▒ₚ
+ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ┆
+ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ┆
+ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ┆
+ 255 252 253 254 0x80 319 315 316 317 318 ╯
+
+ Each ▒ in the diagram represents a complete sequence of chunks. ▒ₚ is a cyclic
+ permutation.
+
+ A major advantage of the "far" layout is that sequential reads can be spread
+ out over different devices, which makes the setup similar to RAID0 in terms of
+ speed. For writes, there is a cost of seeking. They are substantially slower.
+
+ "offset" layout with 2 copies per chunk (--layout=o2):
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Here, a number of consecutive chunks are bundled on each device during the
+ striping operation. The number of consecutive chunks equals the number of
+ devices. Next, a copy of the same chunks is striped in a different pattern.
+ More copies are added as the number 2 goes up.
+
+ A cyclic permutation in the pattern prevents copies of the same chunks
+ landing on the same devices.
+
+ device1 device2 device3 device4 device1 device2 device3 device4 device5
+ ─────── ─────── ─────── ─────── ─────── ─────── ─────── ─────── ───────
+ 0 1 2 3 0x00 0 1 2 3 4 ) AA
+ 3 0 1 2 0x01 4 0 1 2 3 ) AAₚ
+ 4 5 6 7 0x02 5 6 7 8 9 ) AB
+ 7 4 5 6 0x03 9 5 6 7 8 ) ABₚ
+ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ) ⋯
+ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮ ⋮
+ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ⋯ ) ⋯
+ 251 252 253 254 0x79 314 315 316 317 318 ) EX
+ 254 251 252 253 0x80 318 314 315 316 317 ) EXₚ
+
+ With AA, AB, …, AZ, BA, … being the sets of consecutive chunks and
+ AAₚ, ABₚ, …, AZₚ, BAₚ, … their cyclic permutations.
+
+ The read characteristics are probably similar to the "far" layout when a
+ suitably large chunk size is chosen, but with less seeking for writes.
+
+ Upstream and the Debian maintainer do not understand all the nuances and
+ implications. The "offset" layout was only added because the Common
+ RAID Data Disk Format (DDF) supports it, and standard compliance is our
+ goal.
+
+ See the md(4) manpage for more details.
+
+8. (One of) my RAID arrays is busy and cannot be stopped. What gives?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ It is perfectly normal for mdadm to report the array with the root
+ filesystem to be busy on shutdown. The reason for this is that the root
+ filesystem must be mounted to be able to stop the array (or otherwise
+ /sbin/mdadm does not exist), but to stop the array, the root filesystem
+ cannot be mounted. Catch 22. The kernel actually stops the array just before
+ halting, so it's all well.
+
+ If mdadm cannot stop other arrays on your system, check that these arrays
+ aren't used anymore. Common causes for busy/locked arrays are:
+
+ * The array contains a mounted filesystem (check the `mount' output)
+ * The array is used as a swap backend (check /proc/swaps)
+ * The array is used by the device-mapper (check with `dmsetup')
+ * LVM
+ * dm-crypt
+ * EVMS
+ * The array contains a swap partition used for suspend-to-ram
+ (check /etc/initramfs-tools/conf.d/resume)
+ * The array is used by a process (check with `lsof')
+
+9. Should I use RAID0 (or linear)?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ No. Unless you know what you're doing and keep backups, or use it for data
+ that can be lost.
+
+9b. Why not?
+~~~~~~~~~~~~
+ RAID0 has zero redundancy. If you stripe a RAID0 across X devices, you
+ increase the likelyhood of complete loss of the filesystem by a factor of X.
+
+ The same applies to LVM by the way (when LVs are placed over X PVs).
+
+ If you want/must used LVM or RAID0, stripe it across RAID1 arrays
+ (RAID10/RAID1+0, or LVM on RAID1), and keep backups!
+
+10. Can I cancel a running array check (checkarray)?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ See the -x option in the `/usr/share/mdadm/checkarray --help` output.
+
+11. mdadm warns about duplicate/similar superblocks; what gives?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ In certain configurations, especially if your last partition extends all the
+ way to the end of the device, mdadm may display a warning like:
+
+ mdadm: WARNING /dev/sdXY and /dev/sdX appear to have very similar
+ superblocks. If they are really different, please --zero the superblock on
+ one. If they are the same or overlap, please remove one from the DEVICE
+ list in mdadm.conf.
+
+ There are two ways to solve this:
+
+ (a) recreate the arrays with version-1 superblocks, which is not always an
+ option -- you cannot yet upgrade version-0 to version-1 superblocks for
+ existing arrays.
+
+ (b) instead of 'DEVICE partitions', list exactly those devices that are
+ components of MD arrays on your system. So istead of:
+
+ DEVICE partitions
+
+ for example:
+
+ DEVICE /dev/sd[ab]* /dev/sdc[123]
+
+12. mdadm -E / mkconf report different arrays with the same device
+ name / minor number. What gives?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ In almost all cases, mdadm updates the super-minor field in an array's
+ superblock when assembling the array. It does *not* do this for RAID0
+ arrays. Thus, you may end up seeing something like this when you run
+ mdadm -E or mkconf:
+
+ ARRAY /dev/md0 level=raid0 num-devices=2 UUID=abcd...
+ ARRAY /dev/md0 level=raid1 num-devices=2 UUID=dcba...
+
+ Note how the two arrays have different UUIDs but both appear as /dev/md0.
+
+ The solution in this case is to explicitly tell mdadm to update the
+ superblock of the RAID0 array. Assuming that the RAID0 array in the above
+ example should really be /dev/md1:
+
+ mdadm --stop /dev/md1
+ mdadm --assemble --update=super-minor --uuid=abcd... /dev/md1
+
+ See question 2 of this FAQ, and also http://bugs.debian.org/386315 and
+ recipe #12 in README.recipes .
+
+13. Can a MD array be partitioned?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Since kernel 2.6.28, MD arrays can be partitioned like any other block
+ device.
+
+ Prior to 2.6.28, for a MD array to be able to hold partitions, it must be
+ created as a "partitionable array", using the configuration auto=part on the
+ command line or in the configuration file, or by using the standard naming
+ scheme (md_d* or md/d*) for partitionable arrays:
+
+ mdadm --create --auto=yes ... /dev/md_d0 ...
+ # see mdadm(8) manpage about the values of the --auto keyword
+
+14. When would I partition an array?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ This answer by Doug Ledford is shamelessly adapted from [0] (with
+ permission):
+
+ First, not all MD types make sense to be split up, e.g. multipath. For
+ those types, when a device fails, the *entire* device is considered to have
+ failed, but with different arrays you won't switch over to the next path
+ until each MD array has attempted to access the bad path. This can have
+ obvious bad consequences for certain array types that do automatic
+ failover from one port to another (you can end up getting the array in
+ a loop of switching ports repeatedly to satisfy the fact that one array
+ failed over during a path down, then the path came back up, and another
+ array stayed on the old path because it didn't send any commands during
+ the path down time period).
+
+ Second, convenience. Assume you have a 6 device RAID5 array. If a device
+ fails and you are using a partitioned MD array, then all the partitions on
+ the device will already be handled without using that device. No need to
+ manually fail any still active array members from other arrays.
+
+ Third, safety. Again with the RAID5 array. If you use multiple arrays on
+ a single device, and that device fails, but it only failed on one array, then
+ you now need to manually fail that device from the other arrays before
+ shutting down or hot swapping the device. Generally speaking, that's not
+ a big deal, but people do occasionally have fat finger syndrome and this
+ is a good opportunity for someone to accidentally fail the wrong device, and
+ when you then go to remove the device you create a two device failure instead
+ of one and now you are in real trouble.
+
+ Forth, to respond to what you wrote about independent of each other --
+ part of the reason why you partition. I would argue that's not true. If
+ your goal is to salvage as much use from a failing device as possible, then
+ OK. But, generally speaking, people that have something of value on their
+ devices don't want to salvage any part of a failing device, they want that
+ device gone and replaced immediately. There simply is little to no value in
+ an already malfunctioning device. They're too cheap and the data stored on
+ them too valuable to risk loosing something in an effort to further
+ utilize broken hardware. This of course is written with the understanding
+ that the latest MD RAID code will do read error rewrites to compensate for
+ minor device issues, so anything that will throw a device out of an array is
+ more than just a minor sector glitch.
+
+ [0] http://thread.gmane.org/gmane.linux.raid/13594/focus=13597
+
+15. How can I start a dirty degraded array?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ A degraded array (e.g. a RAID5 with only two devices) that has not been
+ properly stopped cannot be assembled just like that; mdadm will refuse and
+ complain about a "dirty degraded array", for good reasons.
+
+ The solution might be to force-assemble it, and then to start it. Please see
+ recipes 4 and 4b of /usr/share/doc/mdadm/README.recipes.gz and make sure you
+ know what you're doing.
+
+16. How can I influence the speed with which an array is resynchronised?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ For each array, the MD subsystem exports parameters governing the
+ synchronisation speed via sysfs. The values are in kB/sec.
+
+ /sys/block/mdX/md/sync_speed -- the current speed
+ /sys/block/mdX/md/sync_speed_max -- the maximum speed
+ /sys/block/mdX/md/sync_speed_min -- the guaranteed minimum speed
+
+17. When I create a new array, why does it resynchronise at first?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ See the mdadm(8) manpage:
+ When creating a RAID5 array, mdadm will automatically create a degraded
+ array with an extra spare drive. This is because building the spare into
+ a degraded array is in general faster than resyncing the parity on
+ a non-degraded, but not clean, array. This feature can be over-ridden with
+ the --force option.
+
+ This also applies to RAID levels 4 and 6.
+
+ It does not make much sense for RAID levels 1 and 10 and can thus be
+ overridden with the --force and --assume-clean options, but it is not
+ recommended. Read the manpage.
+
+18. How many failed devics can a RAID10 handle?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ (see also question 4b)
+
+ The following table shows how many devices you can lose and still have an
+ operational array. In some cases, you *can* lose more than the given number
+ of devices, but there is no guarantee that the array survives. Thus, the
+ following is the guaranteed number of failed devices a RAID10 array survives
+ and the maximum number of failed devices the array can (but is not guaranteed
+ to) handle, given the number of devices used and the number of data block
+ copies. Note that 2 copies means original + 1 copy. Thus, if you only have
+ one copy (the original), you cannot handle any failures.
+
+ 1 2 3 4 (# of copies)
+ 1 0/0 0/0 0/0 0/0
+ 2 0/0 1/1 1/1 1/1
+ 3 0/0 1/1 2/2 2/2
+ 4 0/0 1/2 2/2 3/3
+ 5 0/0 1/2 2/2 3/3
+ 6 0/0 1/3 2/3 3/3
+ 7 0/0 1/3 2/3 3/3
+ 8 0/0 1/4 2/3 3/4
+ (# of devices)
+
+ Note: I have not really verified the above information. Please don't count
+ on it. If a device fails, replace it as soon as possible. Corrections welcome.
+
+19. What should I do if a device fails?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Replace it as soon as possible.
+
+ In case of physical devices with no hot-swap capabilities, for example via:
+
+ mdadm --remove /dev/md0 /dev/sda1
+ poweroff
+ <replace device and start the machine>
+ mdadm --add /dev/md0 /dev/sda1
+
+20. So how do I find out which other device(s) can fail without killing the
+ array?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Did you read the previous question and its answer?
+
+ For cases when you have two copies of each block, the question is easily
+ answered by looking at the output of /proc/mdstat. For instance on a 4 device
+ array:
+
+ md3 : active raid10 sdg7[3] sde7[0] sdh7[2] sdf7[1]
+
+ you know that sde7/sdf7 form one pair and sdg7/sgh7 the other.
+
+ If sdh now fails, this will become
+
+ md3 : active raid10 sdg7[3] sde7[0] sdh7[4](F) sdf7[1]
+
+ So now the second pair is degraded; the array could take another failure in
+ the first pair, but if sdg now also fails, you're history.
+
+ Now go and read question 19.
+
+ For cases with more copies per block, it becomes more complicated. Let's
+ think of a 7 device array with three copies:
+
+ md5 : active raid10 sdg7[6] sde7[4] sdb7[5] sdf7[2] sda7[3] sdc7[1] sdd7[0]
+
+ Each mirror now has 7/3 = 2.33 devices to it, so in order to determine groups,
+ you need to round up. Note how the devices are arranged in decreasing order of
+ their indices (the number in brackes in /proc/mdstat):
+
+ device: -sdd7- -sdc7- -sdf7- -sda7- -sde7- -sdb7- -sdg7-
+ group: [ one ][ two ][ three ]
+
+ Basically this means that after two devices failed, you need to make sure that
+ the third failed device doesn't destroy all copies of any given block. And
+ that's not always easy as it depends on the layout chosen: whether the
+ blocks are near (same offset within each group), far (spread apart in a way
+ to maximise the mean distance), or offset (offset by size/n within each
+ block).
+
+ I'll leave it up to you to figure things out. Now go read question 19.
+
+21. Why does the kernel speak of 'resync' when using checkarray?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Please see README.checkarray and http://thread.gmane.org/gmane.linux.raid/11864 .
+
+ In short: it's a bug. checkarray is actually not a resync, but the kernel
+ does not distinguish between them.
+
+22. Can I prioritise the sync process and sync certain arrays before others?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Upon start, md will resynchronise any unclean arrays, starting in somewhat
+ random order. Sometimes it's desirable to sync e.g. /dev/md3 first (because
+ it's the most important), but while /dev/md1 is synchronising, /dev/md3 will
+ be DELAYED (see /proc/mdstat; only if they share the same physical
+ components.
+
+ It is possible to delay the synchronisation via /sys:
+
+ echo idle >/sys/block/md1/md/sync_action
+
+ This will cause md1 to go idle and MD to synchronise md3 (or whatever is
+ queued next; repeat the above for other devices if necessary). MD will also
+ realise that md1 is still not in sync and queue it for resynchronisation,
+ so it will sync automatically when its turn has come.
+
+23. mdadm's init script fails because it cannot find any arrays. What gives?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ This does not happen anymore, if no arrays present in config file, no arrays
+ will be started.
+
+24. What happened to mdrun? How do I replace it?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdrun used to be the sledgehammer approach to assembling arrays. It has
+ accumulated several problems over the years (e.g. Debian bug #354705) and
+ thus has been deprecated and removed with the 2.6.7-2 version of this package.
+
+ If you are still using mdrun, please ensure that you have a valid
+ /etc/mdadm/mdadm.conf file (run /usr/share/mdadm/mkconf --generate to get
+ one), and run
+
+ mdadm --assemble --scan --auto=yes
+
+ instead of mdrun.
+
+25. Why are my arrays marked auto-read-only in /proc/mdstat?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Arrays are kept read-only until the first write occurs. This allows md to
+ skip lengthy resynchronisation for arrays that have not been properly shut
+ down, but which also not have changed.
+
+26. Why doesn't mdadm find arrays specified in the config file and causes the
+ boot to fail?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ My boot process dies at an early stage and drops me into the busybox shell.
+ The last relevant output seems to be from mdadm and is something like
+
+ "/dev/md2 does not exist"
+
+ or
+
+ "No devices listed in conf file found"
+
+ Why does mdadm break my system?
+
+ Short answer: It doesn't, the underlying devices aren't yet available yet
+ when mdadm runs during the early boot process.
+
+ Long answer: It doesn't, but the drivers of those devices incorrectly
+ communicate to the kernel that the devices are ready, when in fact they are
+ not. I consider this a bug in those drivers. Please consider reporting it.
+
+ Workaround: there is nothing mdadm can or will do against this. Fortunately
+ though, initramfs provides a method, documented at
+ http://wiki.debian.org/InitramfsDebug. Please append rootdelay=10 (which sets
+ a delay of 10 seconds before trying to mount the root filesystem) to the
+ kernel command line and try if the boot now works.
+
+ -- martin f. krafft <madduck@debian.org> Wed, 13 May 2009 09:59:53 +0200
diff --git a/debian/local/doc/README.checkarray b/debian/local/doc/README.checkarray
new file mode 100644
index 0000000..8071a4d
--- /dev/null
+++ b/debian/local/doc/README.checkarray
@@ -0,0 +1,33 @@
+checkarray notes
+================
+
+checkarray will run parity checks across all your redundant arrays. By
+default, it is configured to run on the first Sunday of each month, at 01:06
+in the morning. This is realised by asking cron to wake up every Sunday with
+/etc/cron.d/mdadm, but then only running the script when the day of the month
+is less than or equal to 7. See #380425.
+
+Cron will try to run the check at "idle I/O priority" (see ionice(1)), so that
+the check does not overload the system too much. Note that this will only
+work if all the component devices of the array employ the (default) "cfq" I/O
+scheduler. See the kernel documentation[0] for information on how to verify
+and modify the scheduler. checkarray does not verify this for you.
+
+ 0. http://www.kernel.org/doc/Documentation/block/switching-sched.txt
+
+If you manually invoke checkarray, it runs with default I/O priority. Should
+you need to run a check at a higher (or lower) I/O priority, then have a look
+at the --idle, --slow, --fast, and --realtime options.
+
+'check' is a read-only operation, even though the kernel logs may suggest
+otherwise (e.g. /proc/mdstat and several kernel messages will mention
+"resync"). Please also see question 21 of the FAQ.
+
+If, however, while reading, a read error occurs, the check will trigger the
+normal response to read errors which is to generate the 'correct' data and try
+to write that out - so it is possible that a 'check' will trigger a write.
+However in the absence of read errors it is read-only.
+
+You can cancel a running array check with the -x option to checkarray.
+
+ -- martin f. krafft <madduck@debian.org> Thu, 02 Sep 2010 10:27:29 +0200
diff --git a/debian/local/doc/README.recipes b/debian/local/doc/README.recipes
new file mode 100644
index 0000000..3906629
--- /dev/null
+++ b/debian/local/doc/README.recipes
@@ -0,0 +1,168 @@
+mdadm recipes
+=============
+
+The following examples/recipes may help you with your mdadm experience. I'll
+leave it as an exercise to use the correct device names and parameters in each
+case. You can find pointers to additional documentation in the README.Debian
+file.
+
+Enjoy. Submissions welcome.
+
+The latest version of this document is available here:
+ http://git.debian.org/?p=pkg-mdadm/mdadm.git;a=blob;f=debian/README.recipes;hb=HEAD
+
+The short options used here are:
+
+ -l Set RAID level.
+ -n Number of active devices in the array.
+ -x Specify the number of spare (eXtra) devices in the initial array.
+
+0. create a new array
+~~~~~~~~~~~~~~~~~~~~~
+ mdadm --create -l1 -n2 -x1 /dev/md0 /dev/sd[abc]1 # RAID 1, 1 spare
+ mdadm --create -l5 -n3 -x1 /dev/md0 /dev/sd[abcd]1 # RAID 5, 1 spare
+ mdadm --create -l6 -n4 -x1 /dev/md0 /dev/sd[abcde]1 # RAID 6, 1 spare
+
+1. create a degraded array
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --create -l5 -n3 /dev/md0 /dev/sda1 missing /dev/sdb1
+ mdadm --create -l6 -n4 /dev/md0 /dev/sda1 missing /dev/sdb1 missing
+
+2. assemble an existing array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --assemble --auto=yes /dev/md0 /dev/sd[abc]1
+
+ # if the array is degraded, it won't be started. use --run:
+ mdadm --assemble --auto=yes --run /dev/md0 /dev/sd[ab]1
+
+ # or start it by hand:
+ mdadm --run /dev/md0
+
+3. assemble all arrays in /etc/mdadm/mdadm.conf
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --assemble --auto=yes --scan
+
+4. assemble a dirty degraded array
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --assemble --auto=yes --force /dev/md0 /dev/sd[ab]1
+ mdadm --run /dev/md0
+
+4b. assemble a dirty degraded array at boot-time
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ If the array is started at boot time by the kernel (partition type 0xfd),
+ you can force-assemble it by passing the kernel boot parameter
+
+ md-mod.start_dirty_degraded=1
+
+5. stop arrays
+~~~~~~~~~~~~~~
+ mdadm --stop /dev/md0
+
+ # to stop all arrays in /etc/mdadm/mdadm.conf
+ mdadm --stop --scan
+
+6. hot-add components
+~~~~~~~~~~~~~~~~~~~~~
+ # on the running array:
+ mdadm --add /dev/md0 /dev/sdc1
+
+ # if you add more components than the array was setup with, additional
+ # components will be spares
+
+7. hot-remove components
+~~~~~~~~~~~~~~~~~~~~~~~~
+ # on the running array:
+ mdadm --fail /dev/md0 /dev/sdb1
+
+ # if you have configured spares, watch /proc/mdstat how it fills in
+ mdadm --remove /dev/md0 /dev/sdb1
+
+8. hot-grow a RAID1 by adding new components
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # on the running array, in either order:
+ mdadm --grow -n3 /dev/md0
+ mdadm --add /dev/md0 /dev/sdc1
+
+ # note: without growing first, additional devices become spares and are
+ # *not* synchronised after the add.
+
+9. hot-shrink a RAID1 by removing components
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ mdadm --fail /dev/md0 /dev/sdc1
+ mdadm --remove /dev/md0 /dev/sdc1
+ mdadm --grow -n2 /dev/md0
+
+10. convert existing filesystem to RAID 1
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # The idea is to create a degraded RAID 1 on the second partition, move
+ # data, then hot add the first. This seems safer to me than simply to
+ # force-add a superblock to the existing filesystem.
+ #
+ # Assume /dev/sda1 holds the data (and let's assume it's mounted on
+ # /home) and /dev/sdb1 is empty and of the same size...
+ #
+ mdadm --create /dev/md0 -l1 -n2 /dev/sdb1 missing
+
+ mkfs -t <type> /dev/md0
+ mount /dev/md0 /mnt
+
+ tar -cf- -C /home . | tar -xf- -C /mnt -p
+
+ # consider verifying the data
+ umount /home
+ umount /mnt
+ mount /dev/md0 /home # also change /etc/fstab
+
+ mdadm --add /dev/md0 /dev/sda1
+
+ Warren Togami has a document explaining how to convert a filesystem on
+ a remote system via SSH: http://togami.com/~warren/guides/remoteraidcrazies/
+
+10b. convert existing filesystem to RAID 1 in-place
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ In-place conversion of /dev/sda1 to /dev/md0 is effectively
+
+ mdadm --create /dev/md0 -l1 -n2 /dev/sda1 missing
+
+ however, do NOT do this, as you risk filesystem corruption.
+
+ If you need to do this, first unmount and shrink the filesystem by
+ a megabyte (if supported). Then run the above command, then (optionally)
+ again grow the filesystem as much as possible.
+
+ Do make sure you have backups. If you do not yet, consider method (10)
+ instead (and make backups anyway!).
+
+11. convert existing filesystem to RAID 5/6
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # See (10) for the basics.
+ mdadm --create /dev/md0 -l5 -n3 /dev/sdb1 /dev/sdc1 missing
+
+ #mdadm --create /dev/md0 -l6 -n4 /dev/sdb1 /dev/sdc1 /dev/sdd1 missing
+ mkfs -t <type> /dev/md0
+ mount /dev/md0 /mnt
+
+ tar -cf- -C /home . | tar -xf- -C /mnt -p
+
+ # consider verifying the data
+ umount /home
+ umount /mnt
+ mount /dev/md0 /home # also change /etc/fstab
+
+ mdadm --add /dev/md0 /dev/sda1
+
+12. change the preferred minor of an MD array (RAID)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # you need to manually assemble the array to change the preferred minor
+ # if you manually assemble, the superblock will be updated to reflect
+ # the preferred minor as you indicate with the assembly.
+ # for example, to set the preferred minor to 4:
+ mdadm --assemble /dev/md4 /dev/sd[abc]1
+
+ # this only works on 2.6 kernels, and only for RAID levels of 1 and above.
+ # for other MD arrays, you need to specify --update explicitly:
+ mdadm --assemble --update=super-minor /dev/md4 /dev/sd[abc]1
+
+ # see also item 12 in the FAQ contained with the Debian package.
+
+ -- martin f. krafft <madduck@debian.org> Fri, 06 Oct 2006 15:39:58 +0200
diff --git a/debian/local/initramfs-tools/local-block/mdadm b/debian/local/initramfs-tools/local-block/mdadm
new file mode 100755
index 0000000..214f24c
--- /dev/null
+++ b/debian/local/initramfs-tools/local-block/mdadm
@@ -0,0 +1,61 @@
+#!/bin/sh
+
+PREREQ="multipath"
+
+prereqs()
+{
+ echo "$PREREQ"
+}
+
+case $1 in
+# get pre-requisites
+prereqs)
+ prereqs
+ exit 0
+ ;;
+esac
+
+. /scripts/functions
+
+# Poor man's mdadm-last-resort@.timer
+# That kicks in 2/3rds into the ROOTDELAY
+
+if [ ! -f /run/count.mdadm.initrd ]
+then
+ COUNT=0
+
+ # Unfortunately raid personalities can be registered _after_ block
+ # devices have already been added, and their rules processed, try
+ # triggering again. See #830770
+ udevadm trigger --action=add -s block || true
+ wait_for_udev 10
+else
+ COUNT=$(cat /run/count.mdadm.initrd)
+fi
+COUNT=$((COUNT + 1))
+
+echo $COUNT > /run/count.mdadm.initrd
+
+# Run pure assemble command, even though we default to incremental
+# assembly it is supported for users to export variables via
+# param.conf such as IMSM_NO_PLATFORM. See #830300
+mdadm -q --assemble --scan --no-degraded || true
+
+MAX=30
+if [ ${ROOTDELAY:-0} -gt $MAX ]; then
+ MAX=$ROOTDELAY
+fi
+MAX=$((MAX*2/3))
+
+if [ "$COUNT" = "$MAX" ]
+then
+ # Poor man's mdadm-last-resort@.service for incremental devices
+ mdadm -q --run /dev/md?*
+
+ # And last try for all others
+ mdadm -q --assemble --scan --run
+
+ rm -f /run/count.mdadm.initrd
+fi
+
+exit 0
diff --git a/debian/local/initramfs-tools/local-bottom/mdadm b/debian/local/initramfs-tools/local-bottom/mdadm
new file mode 100755
index 0000000..eda3b17
--- /dev/null
+++ b/debian/local/initramfs-tools/local-bottom/mdadm
@@ -0,0 +1,3 @@
+#!/bin/sh
+rm -f /run/count.mdadm.initrd
+exit 0 \ No newline at end of file
diff --git a/debian/local/reportbug/script b/debian/local/reportbug/script
new file mode 100755
index 0000000..dcb88eb
--- /dev/null
+++ b/debian/local/reportbug/script
@@ -0,0 +1,219 @@
+#!/bin/bash
+#
+# mdadm bug submission control script
+#
+# allows Debian's bug tools to include relevant information in bug reports.
+#
+# Copyright © martin f. krafft <madduck@debian.org>
+# distributed under the terms of the Artistic Licence 2.0
+#
+# we need /bin/bash for readline and -n capabalities in the prompt(s)
+#
+
+# maximise information output even in the case of errors
+set +eu
+
+if ! command -v yesno >/dev/null; then
+ if [ -r /usr/share/reportbug/handle_bugscript ]; then
+ exec /usr/share/reportbug/handle_bugscript ". $0" /dev/stdout
+ fi
+ yesno() {
+ read -n1 -p"$1" REPLY
+ case "$REPLY" in
+ [yY]) REPLY=yep;;
+ [nN]) REPLY=nop;;
+ ('') REPLY="$2";;
+ esac
+ }
+ exec 3>&1
+fi
+
+# do not let people ctrl-c out of the bugscript
+trap : INT
+
+if [ $(id -u) != 0 ]; then
+ if [ -x "$(command -v sudo)" ]; then
+ yesno "Gather system information as root using sudo? (Y/n) " yep
+ if [ "$REPLY" = yep ]; then
+ echo running sudo "$0" "$@"...
+ sudo "$0" "$@" >&3 && exit 0
+ echo "sudo invocation failed, trying /bin/su..."
+ fi
+ fi
+
+ yesno "Gather system information as root using su? (Y/n) " yep
+ if [ "$REPLY" = yep ]; then
+ ARGS=
+ for i in "$@"; do ARGS="${ARGS:+$ARGS }'$1'"; shift; done
+ echo "running su root -s '/bin/sh -c $0${ARGS:+ $ARGS}'..."
+ su root -s /bin/sh -c "$0 $ARGS" >&3 && exit 0
+ unset ARGS
+ echo "su invocation failed."
+ fi
+
+ # arrive here only if neither sudo nor su worked:
+ yesno "Will you provide system information in the bug report yourself? (N/y) " nop
+ if [ "$REPLY" = yep ]; then
+ cat <<_eof >&3
+
+IMPORTANT:
+ please do not forget to include all relevant system information with this
+ bug report. You could run
+ /usr/share/bug/mdadm/script 3>&1
+ as root and attach or include the output.
+
+_eof
+ exit 0
+ fi
+
+ # try our best
+ cat <<_eof >&3
+
+WARNING:
+ the following output was not generated by the root user. If you can, please
+ replace the following up until "-- System Information:" with the output of
+ /usr/share/bug/mdadm/script 3>&1
+ run as root. Thanks!
+
+_eof
+fi
+
+if [ ! -r /proc/mdstat ]; then
+ echo "The local system does not have MD (RAID) support: no drivers loaded."
+ echo "Without MD support, I cannot collect as much information as I'd like."
+
+ #yesno "Are you sure you want to report a bug at this time? " yep
+ yesno "Hit any key to continue..." yep
+ #[ "$REPLY" = yep ] || exit 1
+fi
+
+echo "--- mdadm.conf" >&3
+if [ -r /etc/mdadm/mdadm.conf ]; then
+ grep '^[^#]' /etc/mdadm/mdadm.conf >&3
+elif [ -r /etc/mdadm.conf ]; then
+ grep '^[^#]' /etc/mdadm.conf >&3
+else
+ echo no mdadm.conf file. >&3
+fi
+echo >&3
+
+echo "--- /etc/default/mdadm" >&3
+if [ -r /etc/default/mdadm ]; then
+ grep '^[^#]' /etc/default/mdadm >&3
+else
+ echo no /etc/default/mdadm file. >&3
+fi
+echo >&3
+
+echo "--- /proc/mdstat:" >&3
+cat /proc/mdstat >&3 2>&3 || :
+echo >&3
+
+echo "--- /proc/partitions:" >&3
+cat /proc/partitions >&3 2>&3 || :
+echo >&3
+
+echo "--- LVM physical volumes:" >&3
+if [ -x "$(command -v pvs)" ]; then
+ pvs >&3
+else
+ echo "LVM does not seem to be used." >&3
+fi
+
+echo "--- mount output" >&3
+mount >&3
+echo >&3
+
+echo "--- initrd.img-$(uname -r):" >&3
+if [ -r /boot/initrd.img-$(uname -r) ]; then
+ TEMPDIR=$(mktemp -d)
+ OLDPWD="$PWD"
+ cd "$TEMPDIR"
+ zcat /boot/initrd.img-$(uname -r) 2>&3 | cpio -i 2>&3
+ find -regex '.*/md[a/].+' -type f -exec md5sum {} \; >&3
+
+ echo >&3
+ echo "--- initrd's /conf/conf.d/md:" >&3
+ if [ -r conf/conf.d/md ]; then
+ cat conf/conf.d/md >&3
+ else
+ echo "no conf/md file." >&3
+ fi
+
+ cd "$OLDPWD"
+ rm -rf "$TEMPDIR"
+ unset TEMPDIR
+else
+ echo "no initrd.img-$(uname -r) found." >&3
+fi
+echo >&3
+
+if [ -r /proc/modules ]; then
+ echo "--- /proc/modules:" >&3
+ egrep '(dm_|raid|linear|multipath|faulty)' < /proc/modules >&3 || :
+ echo >&3
+fi
+
+if [ -f /var/log/syslog ]; then
+ if [ -r /var/log/syslog ]; then
+ echo "--- /var/log/syslog:" >&3
+ egrep "^\w{3} [ :[:digit:]]{11} ($(hostname)|localhost) (kernel: md|mdadm): " /var/log/syslog >&3 || :
+ echo >&3
+ else
+ echo "syslog not readable by user." >&3
+ fi
+fi
+
+echo "--- volume detail:" >&3
+for dev in /dev/[hsv]d[a-z]*; do
+ [ ! -r $dev ] && echo "$dev not readable by user." && continue
+ mdadm -E $dev 2>/dev/null && echo -- || echo "$dev is not recognised by mdadm."
+done >&3
+echo >&3
+
+if [ -r /proc/cmdline ]; then
+ echo "--- /proc/cmdline" >&3
+ cat /proc/cmdline >&3
+ echo >&3
+fi
+
+if [ -f /boot/grub/grub.cfg ]; then
+ echo "--- grub2:" >&3
+ if [ -r /boot/grub/grub.cfg ]; then
+ egrep '^[^#].*\<(root=|raid)' /boot/grub/grub.cfg >&3 || :
+ else
+ echo grub.cfg file not readable. >&3
+ fi
+ echo >&3
+fi
+
+if [ -f /boot/grub/menu.lst ]; then
+ echo "--- grub legacy:" >&3
+ if [ -r /boot/grub/menu.lst ]; then
+ grep '^[^#].*\<root=' /boot/grub/menu.lst >&3 || :
+ else
+ echo menu.lst file not readable. >&3
+ fi
+ echo >&3
+fi
+
+if [ -f /etc/lilo.conf ]; then
+ echo "--- lilo:" >&3
+ if [ -r /etc/lilo.conf ]; then
+ egrep '^([^#].*)?root=' /etc/lilo.conf >&3 || :
+ else
+ echo lilo.conf file not readable. >&3
+ fi
+ echo >&3
+fi
+
+echo "--- udev:" >&3
+COLUMNS=70 dpkg -l udev | grep '\<udev\>' >&3
+md5sum /etc/udev/rules.d/*md* /lib/udev/rules.d/*md* >&3 2>/dev/null
+echo >&3
+
+echo "--- /dev:" >&3
+ls -l /dev/md* /dev/disk/by-* >&3
+echo >&3
+
+echo "Auto-generated on $(date -R) by mdadm bugscript" >&3