From 6088e9df34ffdc4dea71dde1025d5500c045db3e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 05:20:41 +0200 Subject: Merging upstream version 4.3+20240412. Signed-off-by: Daniel Baumann --- ANNOUNCE-3.0 | 98 ---- ANNOUNCE-3.0.1 | 22 - ANNOUNCE-3.0.2 | 21 - ANNOUNCE-3.0.3 | 29 -- ANNOUNCE-3.1 | 33 -- ANNOUNCE-3.1.1 | 39 -- ANNOUNCE-3.1.2 | 46 -- ANNOUNCE-3.1.3 | 46 -- ANNOUNCE-3.1.4 | 37 -- ANNOUNCE-3.1.5 | 42 -- ANNOUNCE-3.2 | 77 ---- ANNOUNCE-3.2.1 | 75 ---- ANNOUNCE-3.2.2 | 36 -- ANNOUNCE-3.2.3 | 24 - ANNOUNCE-3.2.4 | 144 ------ ANNOUNCE-3.2.5 | 31 -- ANNOUNCE-3.2.6 | 57 --- ANNOUNCE-3.3 | 63 --- ANNOUNCE-3.3.1 | 23 - ANNOUNCE-3.3.2 | 16 - ANNOUNCE-3.3.3 | 18 - ANNOUNCE-3.3.4 | 37 -- ANNOUNCE-3.4 | 24 - ANNOUNCE-4.0 | 22 - ANNOUNCE-4.1 | 16 - ANNOUNCE-4.2 | 19 - Assemble.c | 2 +- Build.c | 6 - CHANGELOG.md | 368 +++++++++++++++ ChangeLog | 306 ------------- Create.c | 58 ++- Detail.c | 57 ++- Grow.c | 32 +- Incremental.c | 78 +++- MAINTAINERS.md | 44 ++ Makefile | 4 +- Manage.c | 195 ++++---- Monitor.c | 66 +-- README.initramfs | 122 ----- README.md | 83 ++++ TODO | 213 --------- config.c | 25 +- documentation/external-reshape-design.txt | 280 ++++++++++++ documentation/mdadm.conf-example | 65 +++ documentation/mdmon-design.txt | 146 ++++++ drive_encryption.c | 724 ++++++++++++++++++++++++++++++ drive_encryption.h | 37 ++ external-reshape-design.txt | 280 ------------ inventory | 284 ------------ makedist | 96 ---- mdadm.8.in | 21 +- mdadm.conf-example | 65 --- mdadm.conf.5.in | 16 + mdadm.h | 104 +++-- mdadm.spec | 47 -- mdmon-design.txt | 146 ------ mdmon.c | 21 +- mkinitramfs | 55 --- monitor.c | 65 ++- platform-intel.h | 1 - policy.c | 110 ++++- super-ddf.c | 11 +- super-intel.c | 403 +++++++++++++---- super0.c | 2 + super1.c | 16 +- sysfs.c | 58 ++- test | 27 +- tests/func.sh | 1 - udev.c | 3 + util.c | 144 +++--- 70 files changed, 2741 insertions(+), 3241 deletions(-) delete mode 100644 ANNOUNCE-3.0 delete mode 100644 ANNOUNCE-3.0.1 delete mode 100644 ANNOUNCE-3.0.2 delete mode 100644 ANNOUNCE-3.0.3 delete mode 100644 ANNOUNCE-3.1 delete mode 100644 ANNOUNCE-3.1.1 delete mode 100644 ANNOUNCE-3.1.2 delete mode 100644 ANNOUNCE-3.1.3 delete mode 100644 ANNOUNCE-3.1.4 delete mode 100644 ANNOUNCE-3.1.5 delete mode 100644 ANNOUNCE-3.2 delete mode 100644 ANNOUNCE-3.2.1 delete mode 100644 ANNOUNCE-3.2.2 delete mode 100644 ANNOUNCE-3.2.3 delete mode 100644 ANNOUNCE-3.2.4 delete mode 100644 ANNOUNCE-3.2.5 delete mode 100644 ANNOUNCE-3.2.6 delete mode 100644 ANNOUNCE-3.3 delete mode 100644 ANNOUNCE-3.3.1 delete mode 100644 ANNOUNCE-3.3.2 delete mode 100644 ANNOUNCE-3.3.3 delete mode 100644 ANNOUNCE-3.3.4 delete mode 100644 ANNOUNCE-3.4 delete mode 100644 ANNOUNCE-4.0 delete mode 100644 ANNOUNCE-4.1 delete mode 100644 ANNOUNCE-4.2 create mode 100644 CHANGELOG.md delete mode 100644 ChangeLog create mode 100644 MAINTAINERS.md delete mode 100644 README.initramfs create mode 100644 README.md delete mode 100644 TODO create mode 100644 documentation/external-reshape-design.txt create mode 100644 documentation/mdadm.conf-example create mode 100644 documentation/mdmon-design.txt create mode 100644 drive_encryption.c create mode 100644 drive_encryption.h delete mode 100644 external-reshape-design.txt delete mode 100755 inventory delete mode 100755 makedist delete mode 100644 mdadm.conf-example delete mode 100644 mdadm.spec delete mode 100644 mdmon-design.txt delete mode 100644 mkinitramfs diff --git a/ANNOUNCE-3.0 b/ANNOUNCE-3.0 deleted file mode 100644 index f2d4f84..0000000 --- a/ANNOUNCE-3.0 +++ /dev/null @@ -1,98 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.0 - A tool for managing Soft RAID under Linux - -I am pleased to (finally) announce the availability of - mdadm version 3.0 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - - -This is a major new version and as such should be treated with some -caution. However it has seen substantial testing and is considerred -to be ready for wide use. - - -The significant change which justifies the new major version number is -that mdadm can now handle metadata updates entirely in userspace. -This allows mdadm to support metadata formats that the kernel knows -nothing about. - -Currently two such metadata formats are supported: - - DDF - The SNIA standard format - - Intel Matrix - The metadata used by recent Intel ICH controlers. - -Also the approach to device names has changed significantly. - -If udev is installed on the system, mdadm will not create any devices -in /dev. Rather it allows udev to manage those devices. For this to work -as expected, the included udev rules file should be installed. - -If udev is not installed, mdadm will still create devices and symlinks -as required, and will also remove them when the array is stopped. - -mdadm now requires all devices which do not have a standard name (mdX -or md_dX) to live in the directory /dev/md/. Names in this directory -will always be created as symlinks back to the standard name in /dev. - -The man pages contain some information about the new externally managed -metadata. However see below for a more condensed overview. - -Externally managed metadata introduces the concept of a 'container'. -A container is a collection of (normally) physical devices which have -a common set of metadata. A container is assembled as an md array, but -is left 'inactive'. - -A container can contain one or more data arrays. These are composed from -slices (partitions?) of various devices in the container. - -For example, a 5 devices DDF set can container a RAID1 using the first -half of two devices, a RAID0 using the first half of the remain 3 devices, -and a RAID5 over thte second half of all 5 devices. - -A container can be created with - - mdadm --create /dev/md0 -e ddf -n5 /dev/sd[abcde] - -or "-e imsm" to use the Intel Matrix Storage Manager. - -An array can be created within a container either by giving the -container name and the only member: - - mdadm -C /dev/md1 --level raid1 -n 2 /dev/md0 - -or by listing the component devices - - mdadm -C /dev/md2 --level raid0 -n 3 /dev/sd[cde] - -To assemble a container, it is easiest just to pass each device in turn to -mdadm -I - - for i in /dev/sd[abcde] - do mdadm -I $i - done - -This will assemble the container and the components. - -Alternately the container can be assembled explicitly - - mdadm -A /dev/md0 /dev/sd[abcde] - -Then the components can all be assembled with - - mdadm -I /dev/md0 - -For each container, mdadm will start a program called "mdmon" which will -monitor the array and effect any metadata updates needed. The array is -initially assembled readonly. It is up to "mdmon" to mark the metadata -as 'dirty' and which the array to 'read-write'. - -The version 0.90 and 1.x metadata formats supported by previous -versions for mdadm are still supported and the kernel still performs -the same updates it use to. The new 'mdmon' approach is only used for -newly introduced metadata types. - -NeilBrown 2nd June 2009 diff --git a/ANNOUNCE-3.0.1 b/ANNOUNCE-3.0.1 deleted file mode 100644 index 91b4428..0000000 --- a/ANNOUNCE-3.0.1 +++ /dev/null @@ -1,22 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.0.1 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.0.1 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - - -This contains only minor bug fixes over 3.0. If you are using -3.0, you could consider upgrading. - -The brief change log is: - - Fix various segfaults - - Fixed for --examine with containers - - Lots of other little fixes. - -NeilBrown 25th September 2009 diff --git a/ANNOUNCE-3.0.2 b/ANNOUNCE-3.0.2 deleted file mode 100644 index 93643d1..0000000 --- a/ANNOUNCE-3.0.2 +++ /dev/null @@ -1,21 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.0.2 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.0.2 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - - -This just contains one bugfix over 3.0.1 - I was obviously a bit hasty -in releasing that one. - -The brief change log is: - - Fix crash when hosthost is not set, as often happens in - early boot. - -NeilBrown 25th September 2009 diff --git a/ANNOUNCE-3.0.3 b/ANNOUNCE-3.0.3 deleted file mode 100644 index d6117a1..0000000 --- a/ANNOUNCE-3.0.3 +++ /dev/null @@ -1,29 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.0.3 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.0.3 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - - -This contains a collection of bug fixes and minor enhancements over -3.0.1. - -The brief change log is: - - Improvements for creating arrays giving just a name, like 'foo', - rather than the full '/dev/md/foo'. - - Improvements for assembling member arrays of containers. - - Improvements to test suite - - Add option to change increment for RebuildNN messages reported - by "mdadm --monitor" - - Improvements to mdmon 'hand-over' from initrd to final root. - - Handle merging of devices that have left an IMSM array and are - being re-incorporated. - - Add missing space in "--detail --brief" output. - -NeilBrown 22nd October 2009 diff --git a/ANNOUNCE-3.1 b/ANNOUNCE-3.1 deleted file mode 100644 index 343b85d..0000000 --- a/ANNOUNCE-3.1 +++ /dev/null @@ -1,33 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.1 - A tool for managing Soft RAID under Linux - -Hot on the heals of 3.0.3 I am pleased to announce the availability of - mdadm version 3.1 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - - -It contains significant feature enhancements over 3.0.x - -The brief change log is: - - Support --grow to change the layout of RAID4/5/6 - - Support --grow to change the chunksize of raid 4/5/6 - - Support --grow to change level from RAID1 -> RAID5 -> RAID6 and - back. - - Support --grow to reduce the number of devices in RAID4/5/6. - - Support restart of these grow options which assembling an array - which is partially grown. - - Assorted tests of this code, and of different RAID6 layouts. - -Note that a 2.6.31 or later is needed to have access to these. -Reducing devices in a RAID4/5/6 requires 2.6.32. -Changing RAID5 to RAID1 requires 2.6.33. - -You should only upgrade if you need to use, or which to test, these -features. - -NeilBrown 22nd October 2009 diff --git a/ANNOUNCE-3.1.1 b/ANNOUNCE-3.1.1 deleted file mode 100644 index 9e480dc..0000000 --- a/ANNOUNCE-3.1.1 +++ /dev/null @@ -1,39 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.1.1 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.1.1 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - -This is a bugfix release over 3.1, which was withdrawn due to serious -bugs. So it might be best to ignore 3.1 and say that this is a significant -feature release over 3.0.x - -Significant changes are: - - RAID level conversion between RAID1, RAID5, and RAID6 are - possible were the kernel supports it (2.6.32 at least) - - online chunksize and layout changing for RAID5 and RAID6 - where the kernel supports it. - - reduce the number of devices in a RAID4/5/6 array. - - - The default metadata is not v1.1. This metadata is stored at the - start of the device so is safer in many ways but could interfere with - boot loaded. The old default (0.90) is still available and fully - supported. - - - The default chunksize is now 512K rather than 64K. This seems more - appropriate for modern devices. - - - The default bitmap chunksize for internal bitmaps is now at least - 64Meg as fine grained bitmaps tend to impact performance more for - little extra gain. - -This release is believed to be stable and you should feel free to -upgrade to 3.1.1. - -NeilBrown 19th November 2009 diff --git a/ANNOUNCE-3.1.2 b/ANNOUNCE-3.1.2 deleted file mode 100644 index 321b8be..0000000 --- a/ANNOUNCE-3.1.2 +++ /dev/null @@ -1,46 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.1.2 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.1.2 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - -This is a bugfix/stability release over 3.1.1. - -Significant changes are: - - The default metadata has change again (sorry about that). - It is now v1.2 and will hopefully stay that way. It turned - out there with boot-block issues with v1.1 which make it - unsuitable for a default, though in many cases it is still - suitable to use. - - Stopping a container is not permitted when members are still - active - - Add 'homehost' to the valid words for the "AUTO" config file - line. When followed by "-all", this causes mdadm to - auto-assemble any array belonging to this host, but not - auto-assemble anything else. - - Fix some bugs with "--grow --chunksize=" for changing chunksize. - - VAR_RUN can be easily changed at compile time just like ALT_RUN. - This gives distros more flexability in how to manage the - pid and sock files that mdmon needs. - - Various mdmon fixes - - Alway make bitmap 4K-aligned if at all possible. - - If mdadm.conf lists arrays which have inter-dependencies, - the previously had to be listed in the "right" order. Now - any order should work. - - Fix --force assembly of v1.x arrays which are in the process - of recovering. - - Add section on 'scrubbing' to 'md' man page. - - Various command-line-option parsing improvements. - - ... and lots of other bug fixes. - - -This release is believed to be stable and you should feel free to -upgrade to 3.1.2 - -NeilBrown 10th March 2010 diff --git a/ANNOUNCE-3.1.3 b/ANNOUNCE-3.1.3 deleted file mode 100644 index 95b2b6c..0000000 --- a/ANNOUNCE-3.1.3 +++ /dev/null @@ -1,46 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.1.3 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.1.3 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - -This is a bugfix/stability release over 3.1.2 - -Significant changes are: - - mapfile now lives in a fixed location which default to - /dev/.mdadm/map but can be changed at compile time. This - location is choses and most distros provide it during early - boot and preserve it through. As long a /dev exists and is - writable, /dev/.mdadm will be created. - Other files file communication with mdmon live here too. - This fixes a bug reported by Debian and Gentoo users where - udev would spin in early-boot. - - IMSM and DDF metadata will not be recognised on partitions - as they should only be used on whole-disks. - - Various overflows causes by 2G drives have been addressed. - - A subarray of an IMSM contain can now be killed with - --kill-subarray. Also subarrays can be renamed with - --update-subarray - - -If (or --incremental --fail) can be used from udev to - fail and remove from all arrays a device which has been - unplugged from the system. i.e. hot-unplug-support. - - "mdadm /dev/mdX --re-add missing" will look for any device - that looks like it should be a member of /dev/mdX but isn't - and will automatically --re-add it - - Now compile with -Wextra to get extra warnings. - - Lots of minor bug fixes, documentation improvements, etcc - -This release is believed to be stable and you should feel free to -upgrade to 3.1.3 - -It is expected that the next release will be 3.2 with a number of new -features. 3.1.4 will only happen if important bugs show up before 3.2 -is stable. - -NeilBrown 6th August 2010 diff --git a/ANNOUNCE-3.1.4 b/ANNOUNCE-3.1.4 deleted file mode 100644 index c157a36..0000000 --- a/ANNOUNCE-3.1.4 +++ /dev/null @@ -1,37 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.1.4 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.1.4 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - -This is a bugfix/stability release over 3.1.3. -3.1.3 had a couple of embarrasing regressions and a couple of other -issues surfaces which had easy fixes so I decided to make a 3.1.4 -release after all. - -Two fixes related to configs that aren't using udev: - - Don't remove md devices which 'standard' names on --stop - - Allow dev_open to work on read-only /dev -And fixed regressions: - - Allow --incremental to add spares to an array - - Accept --no-degraded as a deprecated option rather than - throwing an error - - Return correct success status when --incrmental assembling - a container which does not yet have enough devices. - - Don't link mdadm with pthreads, only mdmon needs it. - - Fix compiler warning due to bad use of snprintf - - Fix spare migration - -This release is believed to be stable and you should feel free to -upgrade to 3.1.4 - -It is expected that the next release will be 3.2 with a number of new -features. - -NeilBrown 31st August 2010 diff --git a/ANNOUNCE-3.1.5 b/ANNOUNCE-3.1.5 deleted file mode 100644 index baa1f92..0000000 --- a/ANNOUNCE-3.1.5 +++ /dev/null @@ -1,42 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.1.5 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.1.5 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git?p=mdadm - -This is a bugfix/stability release over 3.1.4. It contains all the -important bugfixes found while working on 3.2 and 3.2.1. It will be -the last 3.1.x release - 3.2.1 is expected to be released in a few days. - -Changes include: - - Fixes for v1.x metadata on big-endian machines. - - man page improvements - - Improve '--detail --export' when run on partitions of an md array. - - Fix regression with removing 'failed' or 'detached' devices. - - Fixes for "--assemble --force" in various unusual cases. - - Allow '-Y' to mean --export. This was documented but not implemented. - - Various fixed for handling 'ddf' metadata. This is now more reliable - but could benefit from more interoperability testing. - - Correctly list subarrays of a container in "--detail" output. - - Improve checks on whether the requested number of devices is supported - by the metadata - both for --create and --grow. - - Don't remove partitions from a device that is being included in an - array until we are fully committed to including it. - - Allow "--assemble --update=no-bitmap" so an array with a corrupt - bitmap can still be assembled. - - Don't allow --add to succeed if it looks like a "--re-add" is probably - wanted, but cannot succeed. This avoids inadvertently turning - devices into spares when an array is failed. - -This release is believed to be stable and you should feel free to -upgrade to 3.1.5 - - -NeilBrown 23rd March 2011 - diff --git a/ANNOUNCE-3.2 b/ANNOUNCE-3.2 deleted file mode 100644 index 9e282bc..0000000 --- a/ANNOUNCE-3.2 +++ /dev/null @@ -1,77 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.2 - A tool for managing Soft RAID under Linux (DEVEL ONLY) - -I am pleased to announce the availability of - mdadm version 3.2 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm devel-3.2 - http://neil.brown.name/git?p=mdadm - -This is a "Developers only" release. Please don't consider using it -or making it available to others without reading the following. - - -By far the most significant change in this release related to the -management of reshaping arrays. This code has been substantially -re-written so that it can work with 'externally managed metadata' - -Intel's IMSM in particular. We now support level migration and -OnLine Capacity Expansion on these arrays. - -However, while the code largely works it has not been tested -exhaustively so there are likely to be problems. As the reshape code -for native metadata arrays was changed as part of this rewrite these -problems could also result in regressions for reshape of native -metadata. - -It is partly to encourage greater testing that this release is being -made. Any reports of problem - particular reproducible recipes for -triggering the problems - will be gratefully received. - -It is hopped that a "3.2.1" release will be available in early March -which will be a bugfix release over this and can be considered -suitable for general use. - -Other changes of note: - - - Policy framework. - Various policy statements can be made in the mdadm.conf to guide - the behaviour of mdadm, particular with regards to how new devices - are treated by "mdadm -I". - Depending on the 'action' associated with a device (identified by - its 'path') such need devices can be automatically re-added to and - existing array that they previously fell out off, or automatically - added as a spare if they appear to contain no data. - - - mdadm now has a limited understanding of partition tables. This - allows the policy framework to make decisions about partitioned - devices as well. - - - --incremental --remove can be told what --path the device was on, - and this info will be recorded so that another device appearing at - the same physical location can be preferentially added to the same - array (provides the spare-same-slot action policy applied to the - path). - - - A new flags "--invalid-backup" flag is available in --assemble - mode. This can be used to re-assemble an array which was stopping - in the middle of a reshape, and for which the 'backup file' is no - longer available or is corrupted. The array may have some - corruption in it at the point where reshape was up to, but at least - the rest of the array will become available. - - - - Various internal restructuring - more is needed. - - -Any feed back and bug reports are always welcomed at: - linux-raid@vger.kernel.org - -And please: don't use this in production - particularly not the ---grow functionality. - -NeilBrown 1st February 2011 - - diff --git a/ANNOUNCE-3.2.1 b/ANNOUNCE-3.2.1 deleted file mode 100644 index 0e7826c..0000000 --- a/ANNOUNCE-3.2.1 +++ /dev/null @@ -1,75 +0,0 @@ - - -I am pleased to announce the availability of - mdadm version 3.2.1 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git/mdadm - -Many of the changes in this release are of internal interest only, -restructuring and refactoring code and so forth. - -Most of the bugs found and fixed during development for 3.2.1 have been -back-ported for the recently-release 3.1.5 so this release primarily -provides a few new features over 3.1.5. - -They include: - - policy framework - Policy can be expressed for moving spare devices between arrays, and - for how to handle hot-plugged devices. This policy can be different - for devices plugged in to different controllers etc. - This, for example, allows a configuration where when a device is plugged - in it is immediately included in an md array as a hot spare and - possibly starts recovery immediately if an array is degraded. - - - some understanding of mbr and gpt paritition tables - This is primarly to support the new hot-plug support. If a - device is plugged in and policy suggests it should have a partition table, - the partition table will be copied from a suitably similar device, and - then the partitions will hot-plug and can then be added to md arrays. - - - "--incremental --remove" can remember where a device was removed from - so if a device gets plugged back in the same place, special policy applies - to it, allowing it to be included in an array even if a general hotplug - will not be included. - - - enhanced reshape options, including growing a RAID0 by converting to RAID4, - restriping, and converting back. Also convertions between RAID0 and - RAID10 and between RAID1 and RAID10 are possible (with a suitably recent - kernel). - - - spare migration for IMSM arrays. - Spare migration can now work across 'containers' using non-native metadata - and specifically Intel's IMSM arrays support spare migrations. - - - OLCE and level migration for Intel IMSM arrays. - OnLine Capacity Expansion and level migration (e.g. RAID0 -> RAID5) is - supported for Intel Matrix Storage Manager arrays. - This support is currently 'experimental' for technical reasons. It can - be enabled with "export MDADM_EXPERIMENTAL=1" - - - avoid including wayward devices - If you split a RAID1, mount the two halves as two separate degraded RAID1s, - and then later bring the two back together, it is possible that the md - metadata won't properly show that one must over-ride the other. - mdadm now does extra checking to detect this possibilty and avoid - potentially corrupting data. - - - remove any possible confusion between similar options. - e.g. --brief and --bitmap were mapped to 'b' and mdadm wouldn't - notice if one was used where the other was expected. - - - allow K,M,G suffixes on chunk sizes - - -While mdadm-3.2.1 is considered to be reasonably stable, you should -only use it if you want to try out the new features, or if you -generally like to be on the bleeding edge. If the new features are not -important to you, then 3.1.5 is probably the appropriate version to be using -until 3.2.2 comes out. - -NeilBrown 28th March 2011 diff --git a/ANNOUNCE-3.2.2 b/ANNOUNCE-3.2.2 deleted file mode 100644 index b70d18b..0000000 --- a/ANNOUNCE-3.2.2 +++ /dev/null @@ -1,36 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.2.2 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.2.2 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git/mdadm - -This release is largely a stablising release for the 3.2 series. -Many of the changes just fix bugs introduces in 3.2 or 3.2.1. - -There are some new features. They are: - - reshaping IMSM (Intel metadata) arrays is no longer 'experimental', - it should work properly and be largely compatible with IMSM drivers in - other platforms. - - --assume-clean can be used with --grow --size to avoid resyncing the - new part of the array. This is only support with very new kernels. - - RAID0 arrays can have chunksize which is not a power of 2. This has been - supported in the kernel for a while but is only now supprted by - mdadm. - - - A new tool 'raid6check' is available which can check a RAID6 array, - or part of it, and report which device is most inconsistent with the - others if any stripe is inconsistent. This is still under development - and does not have a man page yet. If anyone tries it out and has any - questions or experience to report, they would be most welcome on - linux-raid@vger.kernel.org. - -Future releases in the 3.2 series will only be made if bugfixes are needed. -The next release to add features is expected to be 3.3. - -NeilBrown 17th June 2011 diff --git a/ANNOUNCE-3.2.3 b/ANNOUNCE-3.2.3 deleted file mode 100644 index 8a8dba4..0000000 --- a/ANNOUNCE-3.2.3 +++ /dev/null @@ -1,24 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.2.3 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.2.3 - -It is available at the usual places: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://neil.brown.name/mdadm - http://neil.brown.name/git/mdadm - -This release is largely a bugfix release for the 3.2 series with many -minor fixes with little or no impact. - -The largest single area of change is support for reshape of Intel -IMSM arrays (OnLine Capacity Explansion and Level Migtration). -Among other fixes, this now has a better chance of surviving if a -device fails during reshape. - -Upgrading is recommended - particularly if you use mdadm for IMSM -arrays - but not essential. - -NeilBrown 23rd December 2011 diff --git a/ANNOUNCE-3.2.4 b/ANNOUNCE-3.2.4 deleted file mode 100644 index e321678..0000000 --- a/ANNOUNCE-3.2.4 +++ /dev/null @@ -1,144 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.2.4 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.2.4 - -It is available at the usual places, now including github: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://github.com/neilbrown/mdadm - git://neil.brown.name/mdadm - http://neil.brown.name/git/mdadm - -This release is largely a bugfix release for the 3.2 series with many -minor fixes with little or no impact. - -"--oneline" log of changes is below. Some notable ones are: - - - --offroot argument to improve interactions between mdmon and initrd - - --prefer argument to select which /dev names to display in some - circumstances. - - relax restructions on when "--add" will be allowed - - Fix bug with adding write-intent-bitmap to active array - - Now defaults to "/run/mdadm" for storing run-time files. - -Upgrading is encouraged. - -The next mdadm release is expected to be 3.3 with a number of new -features. - -NeilBrown 9th May 2012 - -77b3ac8 monitor: make return from read_and_act more symbolic. -68226a8 monitor: ensure we retry soon when 'remove' fails. -8453f8d fix: Monitor sometimes crashes -90fa1a2 Work around gcc-4.7's strict aliasing checks -0c4304c fix: container creation with --incremental used. -5d1c7cd FIX: External metadata sometimes is not updated -3c20f98 FIX: mdmon check in reshape_container() can cause a problem -59ab9f5 FIX: Typo error in fprint command -9587c37 imsm: load_super_imsm_all function refactoring -ec50f7b imsm: load_imsm_super_all supports loading metadata from the device list -ca9de18 imsm: validate the number of imsm volumes per controller -30602f5 imsm: display fd in error trace when when store_imsm_mpb failes -eb155f6 mdmon: Use getopt_long() to parse command line options -08ca2ad Add --offroot argument to mdadm -da82751 Add --offroot argument to mdmon -a0963a8 Spawn mdmon with --offroot if mdadm was launched with --offroot -f878b24 imsm: fix, the second array need to have the whole available space on devices -d597705 getinfo_super1: Use MaxSector in place of sb->size -6ef8905 super1: make aread/awrite always use an aligned buffer. -de5a472 Remove avail_disks arg from 'enough'. -da8fe5a Assemble: fix --force assemble during reshape. -b10c663 config: fix handing of 'homehost' in AUTO line. -92d49ec FIX: NULL pointer to strdup() can be passed -d2bde6d imsm: FIX: No new missing disks are allowed during general migration -111e9fd FIX: Array is not run when expansion disks are added -bf5cf7c imsm: FIX: imsm_get_allowed_degradation() doesn't count degradation for raid1 -50927b1 Fix: Sometimes mdmon throws core dump during reshape -78340e2 Flush mdmon before next reshape step during container operation -e174219 imsm: FIX: Chunk size migration problem -f93346e FIX: use md position to reshape restart -6a75c8c imsm: FIX: use md position to reshape restart -51d83f5 imsm: FIX: Clear migration record when migration switches to next volume. -e1dd332 FIX: restart reshape when reshape process is stopped just between 2 reshapes -1ca90aa FIX: Do not try to (continue) reshape using inactive array -9f1b0f0 config: conf_match should ignore devname when not set. -d669228 Use posix_memalign() for memory used to write bitmaps -178950e FIX: Changes in '0' case for reshape position verification -9200d41 avoid double-free upon "old buggy kernel" sysfs_read failure -4011421 Print error message if failing to write super for 1.x metadata -0011874 Use MDMON_DIR for pid files created in Monitor.c -56d1885 Assemble: don't use O_EXCL until we have checked device content. -b720636 Assemble: support assembling of a RAID0 being reshaped. -c69ffac Manage: allow --re-add to failed array. -52f07f5 Reset bad flag on map update -911cead super1: support superblocks up to 4K. -ad6db3c Create: reduce the verbosity of 'default_layout'. -b2bfdfa super1.c don't keep recalculating bitmap pointer -4122675 Define and use SUPER1_SIZE for allocations -1afa930 init_super1() memset full buffer allocated for superblock -2de0b8a match_metadata_desc1(): Use calloc instead of malloc+memset -3c0bcd4 Use 4K buffer alignment for superblock allocations -308340a Use struct align_fd to cache fd's block size for aligned reads/writes -65ed615 match_metadata_desc0(): Use calloc instead of malloc+memset -de89706 Generalize ROUND_UP() macro and introduce matching ROUND_UP_PTR() -0a2f189 super1.c: use ROUND_UP/ROUND_UP_PTR -654a381 super-intel.c: Use ROUND_UP() instead of manually coding it -42d5dfd __write_init_super_ddf(): Use posix_memalign() instead of static aligned buffer -d4633e0 Examine: fix array size calculation for RAID10. -e62b778 Assemble: improve verbose logging when including old devices. -0073a6e Remove possible crash during RAID6 -> RAID5 reshape. -69fe207 Incremental: fix adding devices with --incremental -bcbb311 Manage: replace 'return 1' with 'goto abort'. -9f58469 Manage: freeze recovery while adding multiple devices. -ae6c05a Create: round off size for RAID1 arrays. -5ca3a90 Grow: print useful error when converting RAID1->RAID5 will fail. -c07d640 Fix tests/05r1-re-add-nosupper -2d762ad Fix the new ROUND_UP macro. -fd324b0 sysfs: fixed sysfs_freeze_array array to work properly with Manage_subdevs. -5551b11 imsm: avoid overflows for disks over 1TB -97f81ee clear hi bits if not used after loading metadata from disk -e03640b simplify calculating array_blocks -29cd082 show 2TB volumes/disks support in --detail-platform -2cc699a check volume size in validate_geometry_imsm_orom -9126b9a check that no disk over 2TB is used to create container when no support -027c374 imsm: set 2tb disk attribute for spare -3556c2f Fix typo: wan -> want -15632a9 parse_size: distinguish between 0 and error. -fbdef49 Bitmap_offset is a signed number -508a7f1 super1: leave more space in front of data by default. -40110b9 Fix two typos in fprintf messages -342460c mdadm man page: fix typo -0e7f69a imsm: display maximum volumes per controller and array -36fd8cc imsm: FIX: Update function imsm_num_data_members() for Raid1/10 -7abc987 imsm: FIX: Add volume size expand support to imsm_analyze_change() -f3871fd imsm: Add new metadata update for volume size expansion -54397ed imsm: Execute size change for external metatdata -016e00f FIX: Support metadata changes rollback -fbf3d20 imsm: FIX: Support metadata changes rollback -44f6f18 FIX: Extend size of raid0 array -7e7e9a4 FIX: Respect metadata size limitations -65a9798 FIX: Detect error and rollback metadata -13bcac9 imsm: Add function imsm_get_free_size() -b130333 imsm: Support setting max size for size change operation -c41e00b imsm: FIX: Component size alignment check -58d26a2 FIX: Size change is possible as standalone change only -4aecb54 FIX: Assembled second array is in read only state during reshape -ae2416e FIX: resolve make everything compilation error -480f356 Raid limit of 1024 when scanning for devices. -c2ecf5f Add --prefer option for --detail and --monitor -0a99975 Relax restrictions on when --add is permitted. -7ce0570 imsm: fix: rebuild does not continue after reboot -b51702b fix: correct extending size of raid0 array -34a1395 Fix sign extension of bitmap_offset in super1.c -012a864 Introduce sysfs_set_num_signed() and use it to set bitmap/offset -5d7b407 imsm: fix: thunderdome may drop 2tb attribute -5ffdc2d Update test for "is udev active". -96fd06e Adjust to new standard of /run -974e039 test: don't worry too much about array size. -b0a658f Grow: failing the set the per-device size is not an error. -36614e9 super-intel.c: Don't try to close negative fd -562aa10 super-intel.c: Fix resource leak from opendir() - diff --git a/ANNOUNCE-3.2.5 b/ANNOUNCE-3.2.5 deleted file mode 100644 index 396da12..0000000 --- a/ANNOUNCE-3.2.5 +++ /dev/null @@ -1,31 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.2.5 - A tool for managing Soft RAID under Linux - -I am somewhat disappointed to have to announce the availability of - mdadm version 3.2.5 - -It is available at the usual places, now including github: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://github.com/neilbrown/mdadm - git://neil.brown.name/mdadm - http://neil.brown.name/git/mdadm - -This release primarily fixes a serious regression in 3.2.4. -This regression does *not* cause any risk to data. It simply -means that adding a device with "--add" would sometime fail -when it should not. - -The fix also includes a couple of minor fixes such as making -the "--layout=preserve" option to "--grow" work again. - -A reminder that the default location for runtime files is now -"/run/mdadm". If you compile this for a distro that does not -have "/run", you will need to compile with an alternate setting for -MAP_DIR. e.g. - make MAP_DIR=/var/run/mdadm -or - make MAP_DIR=/dev/.mdadm - -NeilBrown 18th May 2012 - diff --git a/ANNOUNCE-3.2.6 b/ANNOUNCE-3.2.6 deleted file mode 100644 index f5cfd49..0000000 --- a/ANNOUNCE-3.2.6 +++ /dev/null @@ -1,57 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.2.6 - A tool for managing Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.2.6 - -It is available at the usual places, now including github: - countrycode=xx. - http://www.${countrycode}kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://github.com/neilbrown/mdadm - git://neil.brown.name/mdadm - http://neil.brown.name/git/mdadm - -This is a stablity release which adds a number of bugfixs to 3.2.5. -There are no real stand-out fixes, just lots of little bits and pieces. - -Below is the "git log --oneline --reverse" list of changes since -3.2.5. - -NeilBrown 25th October 2012 - -b7e05d2 udev-rules: prevent systemd from mount devices before they are ready. -0d478e2 mdadm: Fix Segmentation fault. -42f0ca1 imsm: fix: correct checking volume's degradation -fcf2195 Monitor: fix inconsistencies in values for ->percent -5f862fb Monitor: Report NewArray when an array the disappeared, reappears. -6f51b1c Monitor: fix reporting for Fail vs FailSpare etc. -68ad53b mdmon: fix arg parsing. -517f135 Assemble: don't leak memory with fdlist. -090900c udev-rules: prevent systemd from mount devices before they are ready. -446e000 sha1.h: remove ansidecl.h header inclusion -ec894f5 Manage: zero metadata before adding to 'external' array. -3a84db5 ddf: allow a non-spare to be used to recovery a missing device. -c5d61ca ddf: hack to fix container recognition. -23084aa mdmon: fix arg processing for -a -c4e96a3 mdmon: allow --takeover when original was started with --offroot -80841df find_free_devnum: avoid auto-using names in /etc/mdadm.conf -c5c56d6 mapfile: fix mapfile rebuild for containers -aec89f6 fix segfaults in Detail() -2117ad1 Fix 'enough' function for RAID10. -0bc300d Use --offroot flag when assembling md arrays via --incrmental -ac78f24 Grow: make warning about old metadata more explicit. -14026ab Replace sha1.h with slightly older version. -6f6809f Add zlib license to crc32.c -5267ba0 Handles spaces in array names better. -c51f288 imsm: allow --assume-clean to work. -acf7076 Grow: allow --grow --continue to work for native metadata. -335d2a6 Grow: fix a couple of typos with --assume-clean usage -9ff1427 Fix open_container -3713633 mdadm: super0: do not override uuid with homehost -31bff58 Trivial bugfix and spelling fixes. -e1e539f Detail: don't report a faulty device as 'spare' or 'rebuilding'. -22a6461 super0: allow creation of array on 2TB+ devices. -a5d47a2 Create new md devices consistently -eb48676 Monitor: don't complain about non-monitorable arrays in mdadm.conf -ecdf2d7 Query: don't be confused by partition tables. -f7b75c1 Query: allow member of non-0.90 arrays to be better reported. diff --git a/ANNOUNCE-3.3 b/ANNOUNCE-3.3 deleted file mode 100644 index f770aa1..0000000 --- a/ANNOUNCE-3.3 +++ /dev/null @@ -1,63 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.3 - A tools for managing md Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.3 - -It is available at the usual places: - http://www.kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://github.com/neilbrown/mdadm - git://neil.brown.name/mdadm - http://git.neil.brown.name/git/mdadm - -This is a major new release so don't be too surprised if there are a -few issues. If I hear about them they will be fixed in 3.3.1. -git log reports nearly 500 changes since 3.2.6 so I won't list them -all. - -Some highlights are: - -- Some array reshapes can proceed without needing backup file. - This is done by changing the 'data_offset' so we never need to write - any data back over where it was before. If there is no "head space" - or "tail space" to allow data_offset to change, the old mechanism - with a backup file can still be used. -- RAID10 arrays can be reshaped to change the number of devices, - change the chunk size, or change the layout between 'near' - and 'offset'. - This will always change data_offset, and will fail if there is no - room for data_offset to be moved. -- "--assemble --update=metadata" can convert a 0.90 array to a 1.0 array. -- bad-block-logs are supported (but not heavily tested yet) -- "--assemble --update=revert-reshape" can be used to undo a reshape - that has just been started but isn't really wanted. This is very - new and while it passes basic tests it cannot be guaranteed. -- improved locking between --incremental and --assemble -- uses systemd to run "mdmon" if systemd is configured to do that. -- kernel names of md devices can be non-numeric. e.g. "md_home" rather than - "md0". This will probably confuse lots of other tools, so you need to - echo CREATE names=yes >> /etc/mdadm.conf - or the feature will not be used. (you also need a reasonably new kernel). -- "--stop" can be given a kernel name instead of a device name. i.e - mdadm --stop md4 - will work even if /dev/md4 doesn't exist. -- "--detail --export" has some information about the devices in the array -- --dump and --restore can be used to backup and restore the metadata on an - array. -- Hot-replace is supported with - mdadm /dev/mdX --replace /dev/foo - and - mdadm /dev/mdX --replace /dev/foo --with /dev/bar -- Config file can be a directory in which case all "*.conf" files are - read in lexical order. - Default is to read /etc/mdadm.conf and then /etc/mdadm.conf.d - Thus - echo CREATE name=yes > /etc/mdadm.conf.d/names.conf - will also enable the use of named md devices. - -- Lots of improvements to DDF support including adding support for - RAID10 (thanks Martin Wilck). - -and lots of bugfixes and other little changes. - -NeilBrown 3rd September 2013 diff --git a/ANNOUNCE-3.3.1 b/ANNOUNCE-3.3.1 deleted file mode 100644 index 7d5e666..0000000 --- a/ANNOUNCE-3.3.1 +++ /dev/null @@ -1,23 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.3.1 - A tool for managing md Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.3.1 - -It is available at the usual places: - http://www.kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://github.com/neilbrown/mdadm - git://neil.brown.name/mdadm - http://git.neil.brown.name/git/mdadm.git - -The main changes are: - - lots of work on "DDF" support. Hopefully it will be more stable - now. Bug reports are always welcome. - - improved interactions with 'systemd'. Where possible, background - tasks are run from systemd (if it is present) rather then forking - disassociationg from the session. This is important because udev - doesn't really let you disassociate. - -though there are a number of other little bug fixes too. - -NeilBrown 5th June 2014 diff --git a/ANNOUNCE-3.3.2 b/ANNOUNCE-3.3.2 deleted file mode 100644 index 6b54961..0000000 --- a/ANNOUNCE-3.3.2 +++ /dev/null @@ -1,16 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.3.2 - A tool for managing md Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.3.2 - -It is available at the usual places: - http://www.kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://github.com/neilbrown/mdadm - git://neil.brown.name/mdadm - http://git.neil.brown.name/git/mdadm.git - -Changes since 3.3.1 are mostly little bugfixes and some man-page -updates. - -NeilBrown 21st August 2014 diff --git a/ANNOUNCE-3.3.3 b/ANNOUNCE-3.3.3 deleted file mode 100644 index ac1b217..0000000 --- a/ANNOUNCE-3.3.3 +++ /dev/null @@ -1,18 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.3.3 - A tool for managing md Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.3.3 - -It is available at the usual places: - http://www.kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://github.com/neilbrown/mdadm - git://neil.brown.name/mdadm - http://git.neil.brown.name/git/mdadm.git - -The 100 changes since 3.3.3 are mostly little bugfixes and some improvements -to the selftests. -raid6check now handle all RAID6 layouts including DDF correctly. -See git log for the rest. - -NeilBrown 24th July 2015 diff --git a/ANNOUNCE-3.3.4 b/ANNOUNCE-3.3.4 deleted file mode 100644 index 52b9456..0000000 --- a/ANNOUNCE-3.3.4 +++ /dev/null @@ -1,37 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.3.4 - A tool for managing md Soft RAID under Linux - -I am somewhat disappointed to have to announce the availability of - mdadm version 3.3.4 - -It is available at the usual places: - http://www.kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://github.com/neilbrown/mdadm - git://neil.brown.name/mdadm - http://git.neil.brown.name/git/mdadm.git - -In mdadm-3.3 a change was made to how IMSM (Intel Matrix Storage -Manager) metadata was handled. Previously an IMSM array would only -be assembled if it was attached to an IMSM controller. - -In 3.3 this was relaxed as there are circumstances where the -controller is not properly detected. Unfortunately this has negative -consequences which have only just come to light. - -If you have an IMSM RAID1 configured and then disable RAID in the -BIOS, the metadata will remain on the devices. If you then install -some other OS on one device and then install Linux on the other, Linux -might eventually start noticing the IMSM metadata (depending a bit on whether -mdadm is included in the initramfs) and might start up the RAID1. This could -copy one device over the other, thus trashing one of the installations. - -Not good. - -So with this release IMSM arrays will only be assembled if attached to -an IMSM controller, or if "--force" is given to --assemble, or if the -environment variable IMSM_NO_PLATFORM is set (used primarily for -testing). - -I strongly recommend upgrading to 3.3.4 if you are using 3.3 or later. - -NeilBrown 3rd August 2015. diff --git a/ANNOUNCE-3.4 b/ANNOUNCE-3.4 deleted file mode 100644 index 2689732..0000000 --- a/ANNOUNCE-3.4 +++ /dev/null @@ -1,24 +0,0 @@ -Subject: ANNOUNCE: mdadm 3.4 - A tool for managing md Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 3.4 - -It is available at the usual places: - http://www.kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://github.com/neilbrown/mdadm - git://neil.brown.name/mdadm - http://git.neil.brown.name/git/mdadm - -The new second-level version number reflects significant new -functionality, particular support for journalled RAID5/6 and clustered -RAID1. This new support is probably still buggy. Please report bugs. - -There are also a number of fixes for Intel's IMSM metadata support, -and an assortment of minor bug fixes. - -I plan for this to be the last release of mdadm that I provide as I am -retiring from MD and mdadm maintenance. Jes Sorensen has volunteered -to oversee mdadm for the next while. Thanks Jes! - -NeilBrown 28th January 2016 diff --git a/ANNOUNCE-4.0 b/ANNOUNCE-4.0 deleted file mode 100644 index f79c540..0000000 --- a/ANNOUNCE-4.0 +++ /dev/null @@ -1,22 +0,0 @@ -Subject: ANNOUNCE: mdadm 4.0 - A tool for managing md Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 4.0 - -It is available at the usual places: - http://www.kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://git.kernel.org/pub/scm/utils/mdadm/mdadm.git - http://git.kernel.org/cgit/utils/mdadm/ - -The update in major version number primarily indicates this is a -release by it's new maintainer. In addition it contains a large number -of fixes in particular for IMSM RAID and clustered RAID support. In -addition this release includes support for IMSM 4k sector drives, -failfast and better documentation for journaled RAID. - -This is my first release of mdadm. Please thank Neil Brown for his -previous work as maintainer and blame me for all the bugs I caused -since taking over. - -Jes Sorensen, 2017-01-09 diff --git a/ANNOUNCE-4.1 b/ANNOUNCE-4.1 deleted file mode 100644 index a273b9a..0000000 --- a/ANNOUNCE-4.1 +++ /dev/null @@ -1,16 +0,0 @@ -Subject: ANNOUNCE: mdadm 4.1 - A tool for managing md Soft RAID under Linux - -I am pleased to announce the availability of - mdadm version 4.1 - -It is available at the usual places: - http://www.kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://git.kernel.org/pub/scm/utils/mdadm/mdadm.git - http://git.kernel.org/cgit/utils/mdadm/ - -The update constitutes more than one year of enhancements and bug fixes -including for IMSM RAID, Partial Parity Log, clustered RAID support, -improved testing, and gcc-8 support. - -Jes Sorensen, 2018-10-01 diff --git a/ANNOUNCE-4.2 b/ANNOUNCE-4.2 deleted file mode 100644 index 8b22d09..0000000 --- a/ANNOUNCE-4.2 +++ /dev/null @@ -1,19 +0,0 @@ -Subject: ANNOUNCE: mdadm 4.2 - A tool for managing md Soft RAID under Linux - -I am pleased to finally announce the availability of mdadm-4.2. -get 4.2 out the door soon. - -It is available at the usual places: - http://www.kernel.org/pub/linux/utils/raid/mdadm/ -and via git at - git://git.kernel.org/pub/scm/utils/mdadm/mdadm.git - http://git.kernel.org/cgit/utils/mdadm/ - -The release includes more than two years of development and bugfixes, -so it is difficult to remember everything. Highlights include -enhancements and bug fixes including for IMSM RAID, Partial Parity -Log, clustered RAID support, improved testing, and gcc-9 support. - -Thank you everyone who contributed to this release! - -Jes Sorensen, 2021-12-30 diff --git a/Assemble.c b/Assemble.c index 9d04205..f6c5b99 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1988,7 +1988,7 @@ int assemble_container_content(struct supertype *st, int mdfd, * and ignoring special character on the first place. */ if (strcmp(sra->text_version + 1, content->text_version + 1) != 0) { - if (sysfs_set_array(content, 9003) != 0) { + if (sysfs_set_array(content) != 0) { sysfs_free(sra); return 1; } diff --git a/Build.c b/Build.c index 1fbf859..1be90e4 100644 --- a/Build.c +++ b/Build.c @@ -156,12 +156,6 @@ int Build(struct mddev_ident *ident, struct mddev_dev *devlist, struct shape *s, bitmap_fd = open(s->bitmap_file, O_RDWR); if (bitmap_fd < 0) { int major = BITMAP_MAJOR_HI; -#if 0 - if (s->bitmap_chunk == UnSet) { - pr_err("%s cannot be opened.\n", s->bitmap_file); - goto abort; - } -#endif bitmapsize = s->size >> 9; /* FIXME wrong for RAID10 */ if (CreateBitmap(s->bitmap_file, 1, NULL, s->bitmap_chunk, c->delay, diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..c1997ba --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,368 @@ +# Release [mdadm-4.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-4.3) + +Features: +- **IMSM_NO_PLATFORM** boot parameter support from Neil Brown. +- **--write-zeros** option support by Logan Gunthorpe. +- **IMSM** monetization by VMD register from Mateusz Grzonka. +- RST SATA under VMD support from Kevin Friedberg. +- Strong name rules from Mariusz Tkaczyk. + +Fixes: +- Unify failed raid behavior from Coly Li. +- Rework of **--update** options from Mateusz Kusiak. +- **mdmon-initrd** service from Neil Brown. +- **IMSM** expand functionality rework from Mariusz Tkaczyk. +- Mdmonitor improvements from Mateusz Grzonka. +- Failed state verification from Mateusz Kusiak and Kinga Tanska. + +# Release [mdadm-4.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-4.2) + +The release includes more than two years of development and bugfixes, so it is difficult to +remember everything. Highlights include enhancements and bug fixes including for **IMSM** RAID, +Partial Parity Log, clustered RAID support, improved testing, and gcc-9 support. + +# Release [mdadm-4.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-4.1) + +The update constitutes more than one year of enhancements and bug fixes including for **IMSM** +RAID, Partial Parity Log, clustered RAID support, improved testing, and gcc-8 support. + +# Release [mdadm-4.0](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-4.0) + +The update in major version number primarily indicates this is a release by it's new maintainer. +In addition it contains a large number of fixes in particular for IMSM RAID and clustered RAID +support. In addition, this release includes support for IMSM 4k sector drives, failfast and better +documentation for journaled RAID. + +This is my first release of mdadm. Please thank Neil Brown for his previous work as maintainer and +blame me for all the bugs I caused since taking over. + +# Release [mdadm-3.4](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.4) + +- Support for journalled RAID5/6 and clustered RAID1. This new support is probably still buggy. + Please report bugs. + +- There are also a number of fixes for **IMSM** support and an assortment of minor bug fixes. + +- I plan for this to be the last release of mdadm that I provide as I am retiring from MD and mdadm + maintenance. Jes Sorensen has volunteered to oversee mdadm for the next while. Thanks Jes! + +# Release [mdadm-3.3.4](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3.4) + +**I strongly recommend upgrading to 3.3.4 if you are using 3.3 or later with IMSM.** + +- **IMSM** metadata assemble fixes. + + In mdadm-3.3 a change was made to how **IMSM** metadata was handled. Previously an **IMSM** array + would only be assembled if it was attached to an **IMSM** controller. In 3.3 this was relaxed as + there are circumstances where the controller is not properly detected. Unfortunately, this has + negative consequences which have only just come to light. + + If you have an IMSM RAID1 configured and then disable RAID in the BIOS, the metadata will remain + on the devices. If you then install some other OS on one device and then install Linux on the + other, Linux might eventually start noticing the IMSM metadata (depending a bit on whether + mdadm is included in the initramfs) and might start up the RAID1. This could copy one device over + the other, thus trashing one of the installations. + + So, with this release IMSM arrays will only be assembled if attached to an **IMSM** controller, + or if **--force** is given to **--assemble**, or if the environment variable + **IMSM_NO_PLATFORM=1** is set (used primarily for testing). + +# Release [mdadm-3.3.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3.3) + +- The 100 changes since 3.3.3 are mostly little bugfixes and some improvements to the self-tests. +- raid6check now handle all RAID6 layouts including **DDF** correctly. See git log for the rest. + +# Release [mdadm-3.3.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3.2) + +- Little bugfixes and some man-page updates. + +# Release [mdadm-3.3.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3.1) + +- lots of work on **DDF** support. +- Improved interactions with **systemd**. Where possible, background tasks are run from systemd + rather than forking. +- Number of other little bug fixes too. + +# Release [mdadm-3.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.3) + +- Some array reshapes can proceed without needing backup file. This is done by changing the + data_offset* so we never need to write any data back over where it was before. If there is no + 'head space' or 'tail space' to allow *data_offset* to change, the old mechanism with a backup + file can still be used. + +- RAID10 arrays can be reshaped to change the number of devices, change the chunk size, or change + the layout between *near* and *offset*. + This will always change *data_offset*, and will fail if there is no room for *data_offset* to be + moved. + +- **--assemble --update=metadata** can convert a **0.90** array to a **1.0** array. + +- **bad-block-logs** are supported (but not heavily tested yet). + +- **--assemble --update=revert-reshape** can be used to undo a reshape that has just been started + but isn't really wanted. This is very new and while it passes basic tests it cannot be + guaranteed. + +- improved locking between **--incremental** and **--assemble**. + +- uses systemd to run **mdmon** if systemd is configured to do that. +- kernel names of md devices can be non-numeric. e.g. "md_home" rather than + "md0". This will probably confuse lots of other tools, so you need to + **echo CREATE names=yes >> /etc/mdadm.conf** or the feature will not be used (you also need a + reasonably new kernel). + +- **--stop** can be given a kernel name instead of a device name. i.e. **mdadm --stop md4** will + work even if /dev/md4 doesn't exist. + +- **--detail --export** has some information about the devices in the array. +- **--dump** and **--restore** can be used to backup and restore the metadata on an array. +- Hot-replace is supported with **mdadm /dev/mdX --replace /dev/foo** and + **mdadm /dev/mdX --replace /dev/foo --with /dev/bar**. + +- Config file can be a directory in which case all "*.conf" files are read in lexical order. + Default is to read **/etc/mdadm.conf** and then **/etc/mdadm.conf.d**. Thus + **echo CREATE name=yes > /etc/mdadm.conf.d/names.conf** will also enable the use of named md + devices. + +- Lots of improvements to **DDF** support including adding support for RAID10 (thanks Martin Wilck). + +# Release [mdadm-3.2.6](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.6) + +- There are no real stand-out fixes, just lots of little bits and pieces. + +# Release [mdadm-3.2.5](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.5) + +- This release primarily fixes a serious regression in 3.2.4. This regression does *not* cause + any risk to data. It simply means that adding a device with **--add** would sometime fail + when it should not. +- The fix also includes a couple of minor fixes such as making the **--layout=preserve** option to + **--grow** work again. + +# Release [mdadm-3.2.4](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.4) + + - **--offroot** argument to improve interactions between mdmon and initrd. + - **--prefer** argument to select which */dev* names to display in some circumstances. + - relax restrictions on when **--add** will be allowed. + - Fix bug with adding write-intent-bitmap to active array. + - Now defaults to */run/mdadm* for storing run-time files. + +# Release [mdadm-3.2.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.3) + +- The largest single area of change is support for reshape of Intel IMSM arrays (OnLine Capacity + Expansion and Level Migration). +- Among other fixes, this now has a better chance of surviving if a device fails during reshape. + +# Release [mdadm-3.2.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.2) + +- reshaping IMSM (Intel metadata) arrays is no longer 'experimental', it should work properly and be + largely compatible with IMSM drivers in other platforms. +- **--assume-clean** can be used with **--grow --size** to avoid resyncing the new part of the + array. This is only support with very new kernels. +- RAID0 arrays can have chunksize which is not a power of 2. This has been supported in the kernel + for a while but is only now supported by mdadm. + +- A new tool **raid6check** is available, which can check a RAID6 array, or part of it and report + which device is most inconsistent with the others if any stripe is inconsistent. This is still + under development and does not have a man page yet. If anyone tries it out and has any questions + or experience to report, they would be most welcome on linux-raid@vger.kernel.org. + +# Release [mdadm-3.2.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2.1) + +- Policy framework + + Policy can be expressed for moving spare devices between arrays, and for how to handle hot-plugged + devices. This policy can be different for devices plugged in to different controllers etc. This, + for example, allows a configuration where when a device is plugged in it is immediately included + in an md array as a hot spare and possibly starts recovery immediately if an array is degraded. + +- Some understanding of mbr and gpt paritition tables. This is primarily to support the new + hot-plug support. If a device is plugged in and policy suggests it should have a partition table, + the partition table will be copied from a suitably similar device, and then the partitions will + hot-plug and can then be added to md arrays. + +- **--incremental --remove** can remember where a device was removed from so if a device gets + plugged back in the same place, special policy applies to it, allowing it to be included in an + array even if a general hotplug will not be included. + +- Enhanced reshape options, including growing a RAID0 by converting to RAID4, restriping, and + converting back. Also convertions between RAID0 and RAID10 and between RAID1 and RAID10 are + possible (with a suitably recent kernel). + +- Spare migration for IMSM arrays. Spare migration can now work across 'containers' using + non-native metadata and specifically Intel's IMSM arrays support spare migrations. + +- OLCE and level migration for Intel IMSM arrays. OnLine Capacity Expansion and level migration + (e.g. RAID0 -> RAID5) is supported for Intel Matrix Storage Manager arrays. This support is + currently *experimental* for technical reasons. It can be enabled with + **export MDADM_EXPERIMENTAL=1**. + +- avoid including wayward devices. + + If you split a RAID1, mount the two halves as two separate degraded RAID1s, and then later bring + the two back together, it is possible that the md metadata won't properly show that one must + over-ride the other. Mdadm now does extra checking to detect this possibility and avoid + potentially corrupting data. + +- Remove any possible confusion between similar options. e.g. **--brief** and **--bitmap** were + mapped to 'b' and mdadm wouldn't notice if one was used where the other was expected. + +- Allow K,M,G suffixes on chunk sizes. + +# Release [mdadm-3.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.2) + +- By far the most significant change in this release related to the management of reshaping arrays. + This code has been substantially re-written so that it can work with **externally managed + metadata** -Intel's IMSM in particular. We now support level migration and OnLine Capacity + Expansion on these arrays. + +- Various policy statements can be made in the *mdadm.conf* to guide the behavior of mdadm, + particular with regards to how new devices are treated by **--incremental**. Depending on the + *action* associated with a device (identified by its *path*) such need devices can be + automatically re-added to and existing array that they previously fell out off, or automatically + added as a spare if they appear to contain no data. + +- mdadm now has a limited understanding of partition tables. This allows the policy framework to + make decisions about partitioned devices as well. + +- **--incremental --remove** can be told what **--path** the device was on, and this info will be + recorded so that another device appearing at the same physical location can be preferentially + added to the same array (provides the spare-same-slot action policy applied to the path). + +- A new flags **--invalid-backup** flag is available in **--assemble** mode. This can be used to + re-assemble an array which was stopping in the middle of a reshape, and for which the + *backup file* is no longer available or is corrupted. The array may have some corruption in it + at the point where reshape was up to, but at least the rest of the array will become available. + +- Policy framework. +- Various internal restructuring - more is needed. + +# Release [mdadm-3.1.5](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.5) + +- Fixes for **v1.x** metadata on big-endian machines. +- man page improvements. +- Improve **--detail --export** when run on partitions of an md array. +- Fix regression with removing *failed* or *detached* devices. +- Fixes for **--assemble --force** in various unusual cases. +- Allow **-Y** to mean **--export**. This was documented but not implemented. +- Various fixes for handling **ddf** metadata. This is now more reliable but could benefit from + more interoperability testing. +- Correctly list subarrays of a container in **--detail** output. +- Improve checks on whether the requested number of devices is supported by the metadata, both for + **--create** and **--grow**. +- Don't remove partitions from a device that is being included in an array until we are fully + committed to including it. +- Allow **--assemble --update=no-bitmap** so an array with a corrupt bitmap can still be assembled. +- Don't allow **--add** to succeed if it looks like a **--re-add** is probably wanted, but cannot + succeed. This avoids inadvertently turning devices into spares when an array is failed. + +# Release [mdadm-3.1.4](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.4) + +Two fixes related to configs that aren't using udev: +- Don't remove md devices which 'standard' names on **--stop**. +- Allow dev_open to work on read-only */dev*. + +And fixed regressions: +- Allow **--incremental** to add spares to an array. +- Accept **--no-degraded** as a deprecated option rather than throwing an error. +- Return correct success status when **--incremental** assembling a container which does not yet + have enough devices. +- Don't link mdadm with pthreads, only mdmon needs it. +- Fix compiler warning due to bad use of snprintf. + +# Release [mdadm-3.1.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.3) + +- mapfile now lives in a fixed location which default to */dev/.mdadm/map*, but can be changed at + compile time. This location is chosen and most distros provide it during early boot and preserve + it through. As long a */dev* exists and is writable, */dev/.mdadm* will be created. Other files + communication with mdmon live here too. This fixes a bug reported by Debian and Gentoo users where + udev would spin in early-boot. + +- IMSM and DDF metadata will not be recognized on partitions as they should only be used on + whole-disks. + +- Various overflows causes by 2G drives have been addressed. + +- A subarray of an IMSM contain can now be killed with **--kill-subarray**. Also, subarrays can be + renamed with **--update-subarray --update=name**. + +- **-If** (or **--incremental --fail**) can be used from udev to fail and remove from all arrays + a device which has been unplugged from the system i.e. hot-unplug-support. + +- **/dev/mdX --re-add missing** will look for any device that looks like it should be a member of + */dev/mdX* but isn't and will automatically **--re-add** it. + +- Now compile with *-Wextra* to get extra warnings. +- Lots of minor bug fixes, documentation improvements, etc. + +# Release [mdadm-3.1.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.2) + +- The default metadata has change again (sorry about that). It is now **v1.2** and will hopefully + stay that way. It turned out there with boot-block issues with **v1.1** which make it unsuitable + for a default, though in many cases it is still suitable to use. + +- Add *homehost* to the valid words for the **AUTO** config file line. When followed by *-all*, + this causes mdadm to auto-assemble any array belonging to this host, but not auto-assemble + anything else. + +- VAR_RUN can be easily changed at compile time just like ALT_RUN. This gives distros more + flexibility in how to manage the pid and sock files that mdmon needs. + +- If mdadm.conf lists arrays which have inter-dependencies, the previously had to be listed in the + "right" order. Now, any order should work. + +- Fix some bugs with **--grow --chunksize=**. +- Stopping a container is not permitted when members are still active. +- Various mdmon fixes. +- Alway make bitmap 4K-aligned if at all possible. +- Fix **--force** assembly of **v1.x** arrays which are in the process of recovering. +- Add section on 'scrubbing' to 'md' man page. +- Various command-line-option parsing improvements. +- ... and lots of other bug fixes. + +# Release [mdadm-3.1.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1.1) + +- Multiple fixes for new **--grow** levels including fixes for serious data corruption + problems. +- Change default metadata to **v1.1**. +- Change default chunk size to 512K. +- Change default bitmap chunk size to 64MB. +- When **--re-add** is used, don't fall back to **--add** as this can destroy data. + +# Release [mdadm-3.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.1) + +- Support **--grow** to change the layout of RAID 4/5/6. +- Support **--grow** to change the chunk size of RAID 4/5/6. +- Support **--grow** to change level from RAID1 -> RAID5 -> RAID6 and back. +- Support **--grow** to reduce the number of devices in RAID 4/5/6. +- Support restart of these grow options which assembling an array which is partially grown. +- Assorted tests of this code, and of different RAID6 layouts. + +# Release [mdadm-3.0.3](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.0.3) + +- Improvements for creating arrays giving just a name, like *foo*, rather than the full + */dev/md/foo*. +- Improvements for assembling member arrays of containers. +- Improvements to test suite. +- Add option to change increment for *RebuildNN* messages reported by **--monitor**. +- Improvements to **mdmon** hand-over from initrd to final root. +- Handle merging of devices that have left an IMSM array and are being re-incorporated. +- Add missing space in **--detail --brief** output. + +# Release [mdadm-3.0.2](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.0.2) + +- Fix crash when **homehost** is not set, as often happens in early boot. + +# Release [mdadm-3.0.1](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.0.1) + +- Fix various segfaults. +- Fixed for **--examine** with containers. +- Lots of other little fixes. + +# Release [mdadm-3.0](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git/log/?h=mdadm-3.0) + +- Support for **externally managed metadata**, specifically DDF and IMSM. +- Depend on udev to create entries in */dev*, rather than creating them ourselves. +- Remove **--auto-update-home-hosts**. +- New config file line **auto**. +- New *ignore* and *any* options for **homehost**. +- Numerous bug fixes and minor enhancements. diff --git a/ChangeLog b/ChangeLog deleted file mode 100644 index a3bf700..0000000 --- a/ChangeLog +++ /dev/null @@ -1,306 +0,0 @@ -Please see git logs for detailed change log. -This file just contains highlight. - -Changes Prior to release 3.3 -- Some array reshapes can proceed without needing backup file. - This is done by changing the 'data_offset' so we never need to write - any data back over where it was before. If there is no "head space" - or "tail space" to allow data_offset to change, the old mechanism - with a backup file can still be used. -- RAID10 arrays can be reshaped to change the number of devices, - change the chunk size, or change the layout between 'near' - and 'offset'. - This will always change data_offset, and will fail if there is no - room for data_offset to be moved. -- "--assemble --update=metadata" can convert a 0.90 array to a 1.0 array. -- bad-block-logs are supported (but not heavily tested yet) -- "--assemble --update=revert-reshape" can be used to undo a reshape - that has just been started but isn't really wanted. This is very - new and while it passes basic tests it cannot be guaranteed. -- improved locking between --incremental and --assemble -- uses systemd to run "mdmon" if systemd is configured to do that. -- kernel names of md devices can be non-numeric. e.g. "md_home" rather than - "md0". This will probably confuse lots of other tools, so you need to - echo CREATE names=yes >> /etc/mdadm.conf - or the feature will not be used. (you also need a reasonably new kernel). -- "--stop" can be given a kernel name instead of a device name. i.e - mdadm --stop md4 - will work even if /dev/md4 doesn't exist. -- "--detail --export" has some information about the devices in the array -- --dump and --restore can be used to backup and restore the metadata on an - array. -- Hot-replace is supported with - mdadm /dev/mdX --replace /dev/foo - and - mdadm /dev/mdX --replace /dev/foo --with /dev/bar -- Config file can be a directory in which case all "*.conf" files are - read in lexical order. - Default is to read /etc/mdadm.conf and then /etc/mdadm.conf.d - Thus - echo CREATE name=yes > /etc/mdadm.conf.d/names.conf - will also enable the use of named md devices. - -- Lots of improvements to DDF support including adding support for - RAID10 (thanks Martin Wilck). - -Changes Prior to release 3.2.6 - - There are no real stand-out fixes, just lots of little bits and pieces. - -Changes Prior to release 3.2.5 - - This release primarily fixes a serious regression in 3.2.4. - This regression does *not* cause any risk to data. It simply - means that adding a device with "--add" would sometime fail - when it should not. - - - The fix also includes a couple of minor fixes such as making - the "--layout=preserve" option to "--grow" work again. - - -Changes Prior to release 3.2.4 -"--oneline" log of changes is below. Some notable ones are: - - - --offroot argument to improve interactions between mdmon and initrd - - --prefer argument to select which /dev names to display in some - circumstances. - - relax restructions on when "--add" will be allowed - - Fix bug with adding write-intent-bitmap to active array - - Now defaults to "/run/mdadm" for storing run-time files. - -Changes Prior to release 3.2.3 - - The largest single area of change is support for reshape of Intel - IMSM arrays (OnLine Capacity Explansion and Level Migration). - - Among other fixes, this now has a better chance of surviving if a - device fails during reshape. - -Changes Prior to release 3.2.2 - - reshaping IMSM (Intel metadata) arrays is no longer 'experimental', - it should work properly and be largely compatible with IMSM drivers in - other platforms. - - --assume-clean can be used with --grow --size to avoid resyncing the - new part of the array. This is only support with very new kernels. - - RAID0 arrays can have chunksize which is not a power of 2. This has been - supported in the kernel for a while but is only now supprted by - mdadm. - - - A new tool 'raid6check' is available which can check a RAID6 array, - or part of it, and report which device is most inconsistent with the - others if any stripe is inconsistent. This is still under development - and does not have a man page yet. If anyone tries it out and has any - questions or experience to report, they would be most welcome on - linux-raid@vger.kernel.org. - -Changes Prior to release 3.2.1 - - policy framework - Policy can be expressed for moving spare devices between arrays, and - for how to handle hot-plugged devices. This policy can be different - for devices plugged in to different controllers etc. - This, for example, allows a configuration where when a device is plugged - in it is immediately included in an md array as a hot spare and - possibly starts recovery immediately if an array is degraded. - - - some understanding of mbr and gpt paritition tables - This is primarly to support the new hot-plug support. If a - device is plugged in and policy suggests it should have a partition table, - the partition table will be copied from a suitably similar device, and - then the partitions will hot-plug and can then be added to md arrays. - - - "--incremental --remove" can remember where a device was removed from - so if a device gets plugged back in the same place, special policy applies - to it, allowing it to be included in an array even if a general hotplug - will not be included. - - - enhanced reshape options, including growing a RAID0 by converting to RAID4, - restriping, and converting back. Also convertions between RAID0 and - RAID10 and between RAID1 and RAID10 are possible (with a suitably recent - kernel). - - - spare migration for IMSM arrays. - Spare migration can now work across 'containers' using non-native metadata - and specifically Intel's IMSM arrays support spare migrations. - - - OLCE and level migration for Intel IMSM arrays. - OnLine Capacity Expansion and level migration (e.g. RAID0 -> RAID5) is - supported for Intel Matrix Storage Manager arrays. - This support is currently 'experimental' for technical reasons. It can - be enabled with "export MDADM_EXPERIMENTAL=1" - - - avoid including wayward devices - If you split a RAID1, mount the two halves as two separate degraded RAID1s, - and then later bring the two back together, it is possible that the md - metadata won't properly show that one must over-ride the other. - mdadm now does extra checking to detect this possibilty and avoid - potentially corrupting data. - - - remove any possible confusion between similar options. - e.g. --brief and --bitmap were mapped to 'b' and mdadm wouldn't - notice if one was used where the other was expected. - - - allow K,M,G suffixes on chunk sizes - -Changes Prior to release 3.2 - - By far the most significant change in this release related to the - management of reshaping arrays. This code has been substantially - re-written so that it can work with 'externally managed metadata' - - Intel's IMSM in particular. We now support level migration and - OnLine Capacity Expansion on these arrays. - - Policy framework. - Various policy statements can be made in the mdadm.conf to guide - the behaviour of mdadm, particular with regards to how new devices - are treated by "mdadm -I". - Depending on the 'action' associated with a device (identified by - its 'path') such need devices can be automatically re-added to and - existing array that they previously fell out off, or automatically - added as a spare if they appear to contain no data. - - - mdadm now has a limited understanding of partition tables. This - allows the policy framework to make decisions about partitioned - devices as well. - - - --incremental --remove can be told what --path the device was on, - and this info will be recorded so that another device appearing at - the same physical location can be preferentially added to the same - array (provides the spare-same-slot action policy applied to the - path). - - - A new flags "--invalid-backup" flag is available in --assemble - mode. This can be used to re-assemble an array which was stopping - in the middle of a reshape, and for which the 'backup file' is no - longer available or is corrupted. The array may have some - corruption in it at the point where reshape was up to, but at least - the rest of the array will become available. - - - - Various internal restructuring - more is needed. - -Changes Prior to release 3.1.5 - - Fixes for v1.x metadata on big-endian machines. - - man page improvements - - Improve '--detail --export' when run on partitions of an md array. - - Fix regression with removing 'failed' or 'detached' devices. - - Fixes for "--assemble --force" in various unusual cases. - - Allow '-Y' to mean --export. This was documented but not implemented. - - Various fixed for handling 'ddf' metadata. This is now more reliable - but could benefit from more interoperability testing. - - Correctly list subarrays of a container in "--detail" output. - - Improve checks on whether the requested number of devices is supported - by the metadata - both for --create and --grow. - - Don't remove partitions from a device that is being included in an - array until we are fully committed to including it. - - Allow "--assemble --update=no-bitmap" so an array with a corrupt - bitmap can still be assembled. - - Don't allow --add to succeed if it looks like a "--re-add" is probably - wanted, but cannot succeed. This avoids inadvertently turning - devices into spares when an array is failed. - -Changes Prior to release 3.1.4 - Two fixes related to configs that aren't using udev: - - Don't remove md devices which 'standard' names on --stop - - Allow dev_open to work on read-only /dev - And fixed regressions: - - Allow --incremental to add spares to an array - - Accept --no-degraded as a deprecated option rather than - throwing an error - - Return correct success status when --incrmental assembling - a container which does not yet have enough devices. - - Don't link mdadm with pthreads, only mdmon needs it. - - Fix compiler warning due to bad use of snprintf - -Changes Prior to release 3.1.3 - - mapfile now lives in a fixed location which default to - /dev/.mdadm/map but can be changed at compile time. This - location is choses and most distros provide it during early - boot and preserve it through. As long a /dev exists and is - writable, /dev/.mdadm will be created. - Other files file communication with mdmon live here too. - This fixes a bug reported by Debian and Gentoo users where - udev would spin in early-boot. - - IMSM and DDF metadata will not be recognised on partitions - as they should only be used on whole-disks. - - Various overflows causes by 2G drives have been addressed. - - A subarray of an IMSM contain can now be killed with - --kill-subarray. Also subarrays can be renamed with - --update-subarray - - -If (or --incremental --fail) can be used from udev to - fail and remove from all arrays a device which has been - unplugged from the system. i.e. hot-unplug-support. - - "mdadm /dev/mdX --re-add missing" will look for any device - that looks like it should be a member of /dev/mdX but isn't - and will automatically --re-add it - - Now compile with -Wextra to get extra warnings. - - Lots of minor bug fixes, documentation improvements, etcc - -Changes Prior to release 3.1.2 - - The default metadata has change again (sorry about that). - It is now v1.2 and will hopefully stay that way. It turned - out there with boot-block issues with v1.1 which make it - unsuitable for a default, though in many cases it is still - suitable to use. - - Stopping a container is not permitted when members are still - active - - Add 'homehost' to the valid words for the "AUTO" config file - line. When followed by "-all", this causes mdadm to - auto-assemble any array belonging to this host, but not - auto-assemble anything else. - - Fix some bugs with "--grow --chunksize=" for changing chunksize. - - VAR_RUN can be easily changed at compile time just like ALT_RUN. - This gives distros more flexability in how to manage the - pid and sock files that mdmon needs. - - Various mdmon fixes - - Alway make bitmap 4K-aligned if at all possible. - - If mdadm.conf lists arrays which have inter-dependencies, - the previously had to be listed in the "right" order. Now - any order should work. - - Fix --force assembly of v1.x arrays which are in the process - of recovering. - - Add section on 'scrubbing' to 'md' man page. - - Various command-line-option parsing improvements. - - ... and lots of other bug fixes. - -Changes Prior to release 3.1.1 - - Multiple fixes for new --grow levels including fixes for - serious data corruption problems. - - Change default metadata to v1.1 - - Change default chunk size to 512K - - Change default bitmap chunk size to 64Meg - - When --re-add is used, don't fall back to - --add if --re-add fails as this can destroy data. - -Changes Prior to release 3.1 - - Support --grow to change the layout of RAID4/5/6 - - Support --grow to change the chunksize of raid 4/5/6 - - Support --grow to change level from RAID1 -> RAID5 -> RAID6 and - back. - - Support --grow to reduce the number of devices in RAID4/5/6. - - Support restart of these grow options which assembling an array - which is partially grown. - - Assorted tests of this code, and of different RAID6 layouts. - -Changes Prior to release 3.0.3 - - Improvements for creating arrays giving just a name, like 'foo', - rather than the full '/dev/md/foo'. - - Improvements for assembling member arrays of containers. - - Improvements to test suite - - Add option to change increment for RebuildNN messages reported - by "mdadm --monitor" - - Improvements to mdmon 'hand-over' from initrd to final root. - - Handle merging of devices that have left an IMSM array and are - being re-incorporated. - - Add missing space in "--detail --brief" output. - -Changes Prior to release 3.0.2 - - Fix crash when hosthost is not set, as often happens in - early boot. - -Changes Prior to release 3.0.1 - - Fix various segfaults - - Fixed for --examine with containers - - Lots of other little fixes. - -Changes Prior to release 3.0 - - Support for externally managed metadata, specifically DDF and IMSM. - - Depend on udev to create entries in /dev, rather than creating them - ourselves. - - remove --auto-update-home-hosts - - new config file line "auto" - - new "" and "any" options for "homehost" - - numerous bug fixes and minor enhancements. diff --git a/Create.c b/Create.c index 8082f54..d94253b 100644 --- a/Create.c +++ b/Create.c @@ -32,6 +32,10 @@ #include #include +#ifndef FALLOC_FL_ZERO_RANGE +#define FALLOC_FL_ZERO_RANGE 16 +#endif + static int round_size_and_verify(unsigned long long *size, int chunk) { if (*size == 0) @@ -279,8 +283,10 @@ static int add_disk_to_super(int mdfd, struct shape *s, struct context *c, dv->devname); return 1; } - if (!fstat_is_blkdev(fd, dv->devname, &rdev)) + if (!fstat_is_blkdev(fd, dv->devname, &rdev)) { + close(fd); return 1; + } info->disk.major = major(rdev); info->disk.minor = minor(rdev); } @@ -289,6 +295,7 @@ static int add_disk_to_super(int mdfd, struct shape *s, struct context *c, if (st->ss->add_to_super(st, &info->disk, fd, dv->devname, dv->data_offset)) { ioctl(mdfd, STOP_ARRAY, NULL); + close(fd); return 1; } st->ss->getinfo_super(st, info, NULL); @@ -297,6 +304,7 @@ static int add_disk_to_super(int mdfd, struct shape *s, struct context *c, *zero_pid = write_zeroes_fork(fd, s, st, dv); if (*zero_pid <= 0) { ioctl(mdfd, STOP_ARRAY, NULL); + close(fd); return 1; } } @@ -493,6 +501,7 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, */ int mdfd; unsigned long long minsize = 0, maxsize = 0; + dev_policy_t *custom_pols = NULL; char *mindisc = NULL; char *maxdisc = NULL; char *name = ident->name; @@ -584,6 +593,9 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, first_missing = subdevs * 2; second_missing = subdevs * 2; insert_point = subdevs * 2; + + if (mddev_test_and_add_drive_policies(st, &custom_pols, fd, 1)) + exit(1); } } if (fd >= 0) @@ -735,7 +747,7 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, close(dfd); exit(2); } - close(dfd); + info.array.working_disks++; if (dnum < s->raiddisks && dv->disposition != 'j') info.array.active_disks++; @@ -808,6 +820,11 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, } } + if (drive_test_and_add_policies(st, &custom_pols, dfd, 1)) + exit(1); + + close(dfd); + if (dv->disposition == 'j') goto skip_size_check; /* skip write journal for size check */ @@ -882,6 +899,7 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, close(fd); } } + if (missing_disks == dnum && !have_container) { pr_err("Subdevs can't be all missing\n"); return 1; @@ -1136,26 +1154,30 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, goto abort_locked; } - if (did_default && c->verbose >= 0) { + if (did_default) { if (is_subarray(info.text_version)) { - char devnm[32]; - char *ep; + char devnm[MD_NAME_MAX]; struct mdinfo *mdi; - strncpy(devnm, info.text_version+1, 32); - devnm[31] = 0; - ep = strchr(devnm, '/'); - if (ep) - *ep = 0; + sysfs_get_container_devnm(&info, devnm); + + mdi = sysfs_read(-1, devnm, GET_VERSION | GET_DEVS); + if (!mdi) { + pr_err("Cannot open sysfs for container %s\n", devnm); + goto abort_locked; + } + + if (sysfs_test_and_add_drive_policies(st, &custom_pols, mdi, 1)) + goto abort_locked; - mdi = sysfs_read(-1, devnm, GET_VERSION); + if (c->verbose >= 0) + pr_info("Creating array inside %s container /dev/%s\n", + mdi->text_version, devnm); - pr_info("Creating array inside %s container %s\n", - mdi?mdi->text_version:"managed", devnm); sysfs_free(mdi); - } else - pr_info("Defaulting to version %s metadata\n", - info.text_version); + } else if (c->verbose >= 0) { + pr_info("Defaulting to version %s metadata\n", info.text_version); + } } map_update(&map, fd2devnm(mdfd), info.text_version, @@ -1325,6 +1347,8 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, udev_unblock(); close(mdfd); sysfs_uevent(&info, "change"); + dev_policy_free(custom_pols); + return 0; abort: @@ -1336,5 +1360,7 @@ int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, if (mdfd >= 0) close(mdfd); + + dev_policy_free(custom_pols); return 1; } diff --git a/Detail.c b/Detail.c index aaa3dd6..55a086d 100644 --- a/Detail.c +++ b/Detail.c @@ -49,6 +49,30 @@ static int add_device(const char *dev, char ***p_devices, return n_devices + 1; } +/** + * detail_fname_from_uuid() - generate uuid string with special super1 handling. + * @mp: map entry to parse. + * @buf: buf to write. + * + * Hack to workaround an issue with super1 superblocks. It swapuuid set in order for assembly + * to work, but can't have it set if we want this printout to match all the other uuid printouts + * in super1.c, so we force swapuuid to 1 to make our printout match the rest of super1. + * + * Always convert uuid if host is big endian. + */ +char *detail_fname_from_uuid(struct map_ent *mp, char *buf) +{ +#if __BYTE_ORDER == BIG_ENDIAN + bool swap = true; +#else + bool swap = false; +#endif + if (strncmp(mp->metadata, "1.", 2) == 0) + swap = true; + + return __fname_from_uuid(mp->uuid, swap, buf, ':'); +} + int Detail(char *dev, struct context *c) { /* @@ -226,6 +250,9 @@ int Detail(char *dev, struct context *c) str = map_num(pers, array.level); if (c->export) { + char nbuf[64]; + struct map_ent *mp = NULL, *map = NULL; + if (array.raid_disks) { if (str) printf("MD_LEVEL=%s\n", str); @@ -247,32 +274,22 @@ int Detail(char *dev, struct context *c) array.minor_version); } - if (st && st->sb && info) { - char nbuf[64]; - struct map_ent *mp, *map = NULL; - - fname_from_uuid(st, info, nbuf, ':'); - printf("MD_UUID=%s\n", nbuf + 5); + if (info) mp = map_by_uuid(&map, info->uuid); + if (!mp) + mp = map_by_devnm(&map, fd2devnm(fd)); - if (mp && mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) + if (mp) { + detail_fname_from_uuid(mp, nbuf); + printf("MD_UUID=%s\n", nbuf + 5); + if (mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) printf("MD_DEVNAME=%s\n", mp->path + DEV_MD_DIR_LEN); + } + map_free(map); + if (st && st->sb) { if (st->ss->export_detail_super) st->ss->export_detail_super(st); - map_free(map); - } else { - struct map_ent *mp, *map = NULL; - char nbuf[64]; - mp = map_by_devnm(&map, fd2devnm(fd)); - if (mp) { - __fname_from_uuid(mp->uuid, 0, nbuf, ':'); - printf("MD_UUID=%s\n", nbuf+5); - } - if (mp && mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) - printf("MD_DEVNAME=%s\n", mp->path + DEV_MD_DIR_LEN); - - map_free(map); } if (!c->no_devices && sra) { struct mdinfo *mdi; diff --git a/Grow.c b/Grow.c index f95dae8..074f199 100644 --- a/Grow.c +++ b/Grow.c @@ -2085,9 +2085,10 @@ int Grow_reshape(char *devname, int fd, if (!mdmon_running(st->container_devnm)) start_mdmon(st->container_devnm); ping_monitor(container); - if (mdmon_running(st->container_devnm) && - st->update_tail == NULL) - st->update_tail = &st->updates; + if (mdmon_running(st->container_devnm) == false) { + pr_err("No mdmon found. Grow cannot continue.\n"); + goto release; + } } if (s->size == MAX_SIZE) @@ -2097,11 +2098,7 @@ int Grow_reshape(char *devname, int fd, /* got truncated to 32bit, write to * component_size instead */ - if (sra) - rv = sysfs_set_num(sra, NULL, - "component_size", s->size); - else - rv = -1; + rv = sysfs_set_num(sra, NULL, "component_size", s->size); } else { rv = md_set_array_info(fd, &array); @@ -3048,6 +3045,8 @@ static int reshape_array(char *container, int fd, char *devname, dprintf("Cannot get array information.\n"); goto release; } + if (st->update_tail == NULL) + st->update_tail = &st->updates; if (array.level == 0 && info->component_size == 0) { get_dev_size(fd, NULL, &array_size); info->component_size = array_size / array.raid_disks; @@ -4414,19 +4413,8 @@ static void validate(int afd, int bfd, unsigned long long offset) lseek64(afd, __le64_to_cpu(bsb2.arraystart)*512, 0); if ((unsigned long long)read(afd, abuf, len) != len) fail("read first from array failed"); - if (memcmp(bbuf, abuf, len) != 0) { -#if 0 - int i; - printf("offset=%llu len=%llu\n", - (unsigned long long)__le64_to_cpu(bsb2.arraystart)*512, len); - for (i=0; iupdate_tail = &st->updates; - else { + if (mdmon_running(container) == false) { pr_err("No mdmon found. Grow cannot continue.\n"); ret_val = 1; goto Grow_continue_command_exit; diff --git a/Incremental.c b/Incremental.c index 30c07c0..83db071 100644 --- a/Incremental.c +++ b/Incremental.c @@ -833,6 +833,54 @@ container_members_max_degradation(struct map_ent *map, struct map_ent *me) return max_degraded; } +/** + * incremental_external_test_spare_criteria() - helper to test spare criteria. + * @st: supertype, must be not NULL, it is duplicated here. + * @container_devnm: devnm of the container. + * @disk_fd: file descriptor of device to tested. + * @verbose: verbose flag. + * + * The function is used on new drive verification path to check if it can be added to external + * container. To test spare criteria, metadata must be loaded. It duplicates super to not mess in + * original one. + * Function is executed if superblock supports get_spare_criteria(), otherwise success is returned. + */ +mdadm_status_t incremental_external_test_spare_criteria(struct supertype *st, char *container_devnm, + int disk_fd, int verbose) +{ + mdadm_status_t rv = MDADM_STATUS_ERROR; + char container_devname[PATH_MAX]; + struct spare_criteria sc = {0}; + struct supertype *dup; + + if (!st->ss->get_spare_criteria) + return MDADM_STATUS_SUCCESS; + + dup = dup_super(st); + snprintf(container_devname, PATH_MAX, "/dev/%s", container_devnm); + + if (dup->ss->get_spare_criteria(dup, container_devname, &sc) != 0) { + if (verbose > 1) + pr_err("Failed to get spare criteria for %s\n", container_devname); + goto out; + } + + if (!disk_fd_matches_criteria(dup, disk_fd, &sc)) { + if (verbose > 1) + pr_err("Disk does not match spare criteria for %s\n", container_devname); + goto out; + } + + rv = MDADM_STATUS_SUCCESS; + +out: + dev_policy_free(sc.pols); + dup->ss->free_super(dup); + free(dup); + + return rv; +} + static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, struct map_ent *target, int bare, struct supertype *st, int verbose) @@ -873,8 +921,7 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, struct supertype *st2; struct domainlist *dl = NULL; struct mdinfo *sra; - unsigned long long devsize, freesize = 0; - struct spare_criteria sc = {0, 0}; + unsigned long long freesize = 0; if (is_subarray(mp->metadata)) continue; @@ -925,34 +972,19 @@ static int array_try_spare(char *devname, int *dfdp, struct dev_policy *pol, if (sra->array.failed_disks == -1) sra->array.failed_disks = container_members_max_degradation(map, mp); - get_dev_size(dfd, NULL, &devsize); if (sra->component_size == 0) { - /* true for containers, here we must read superblock - * to obtain minimum spare size */ - struct supertype *st3 = dup_super(st2); - int mdfd = open_dev(mp->devnm); - if (mdfd < 0) { - free(st3); + /* true for containers */ + if (incremental_external_test_spare_criteria(st2, mp->devnm, dfd, verbose)) goto next; - } - if (st3->ss->load_container && - !st3->ss->load_container(st3, mdfd, mp->path)) { - if (st3->ss->get_spare_criteria) - st3->ss->get_spare_criteria(st3, &sc); - st3->ss->free_super(st3); - } - free(st3); - close(mdfd); } - if ((sra->component_size > 0 && - st2->ss->validate_geometry(st2, sra->array.level, sra->array.layout, + + if (sra->component_size > 0 && + st2->ss->validate_geometry(st2, sra->array.level, sra->array.layout, sra->array.raid_disks, &sra->array.chunk_size, sra->component_size, sra->devs ? sra->devs->data_offset : INVALID_SECTORS, devname, &freesize, sra->consistency_policy, - 0) && - freesize < sra->component_size) || - (sra->component_size == 0 && devsize < sc.min_size)) { + 0) && freesize < sra->component_size) { if (verbose > 1) pr_err("not adding %s to %s as it is too small\n", devname, mp->path); diff --git a/MAINTAINERS.md b/MAINTAINERS.md new file mode 100644 index 0000000..9c79ba8 --- /dev/null +++ b/MAINTAINERS.md @@ -0,0 +1,44 @@ +# Maintainer tools + +Useful tools used in daily routines: +- [checkpatch](https://docs.kernel.org/dev-tools/checkpatch.html) +- [kup](https://korg.docs.kernel.org/kup.html) +- [Auto-publishing](https://korg.docs.kernel.org/kup.html#auto-publishing-with-git-archive-signer) +- [b4](https://b4.docs.kernel.org/en/latest/) + +# Checklist before applying patch + +We don't have CI testing yet, so all those steps must be performed manually: +- Style check with [checkpatch](https://docs.kernel.org/dev-tools/checkpatch.html): + + This is the current code style follows. We are not strict to all rules. It must be run + by **checkpatch --no-tree**, see README.md. + +- [Commit style](https://www.kernel.org/doc/html/v4.10/process/submitting-patches.html): + + It doesn't need to be followed as strictly as is in kernel but changes should be logically + separated. Submitter should care at least to mention "It is used in next patches" if unused + externs/files are added in patch. We love: *Reported-by:*, *Suggested-by:*, *Fixes:* tags. + +- Compilation, ideally on various gcc versions. +- Mdadm test suite execution. +- Consider requesting new tests from submitter, especially for new functionalities. +- Ensure that maintainer *sign-off* is added, before pushing. + +# Making a release + +Assuming that maintainer is certain that release is safe, following steps must be done: + +- Update versions strings in release commit, please refer to previous releases for examples. + +- Create GPG signed tag and push it to repo. Use same format as was used previously, prefixed by + **mdadm-**, e.g. **mdadm-3.1.2**, **mdadm-4.1**. + +- [Auto-publishing](https://korg.docs.kernel.org/kup.html#auto-publishing-with-git-archive-signer): + + Adopt script to our release tag model. When ready, push signed note to repository. If it is done + correctly, then *(sig)* is added to the package automatically generated by kernel.org automation. + There is no need to upload archive manually. + +- Update CHANGELOG.md. +- Write "ANNOUNCE" mail to linux-raid@kernel.org to notify community. diff --git a/Makefile b/Makefile index cbdba49..7c221a8 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o uuid.o util.o maps.o lib.o u mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \ super-mbr.o super-gpt.o \ restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o xmalloc.o \ - platform-intel.o probe_roms.o crc32c.o + platform-intel.o probe_roms.o crc32c.o drive_encryption.o CHECK_OBJS = restripe.o uuid.o sysfs.o maps.o lib.o xmalloc.o dlink.o @@ -183,7 +183,7 @@ MON_OBJS = mdmon.o monitor.o managemon.o uuid.o util.o maps.o mdstat.o sysfs.o c Kill.o sg_io.o dlink.o ReadMe.o super-intel.o \ super-mbr.o super-gpt.o \ super-ddf.o sha1.o crc32.o msg.o bitmap.o xmalloc.o \ - platform-intel.o probe_roms.o crc32c.o + platform-intel.o probe_roms.o crc32c.o drive_encryption.o MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS)) diff --git a/Manage.c b/Manage.c index 30302ac..96e5ee5 100644 --- a/Manage.c +++ b/Manage.c @@ -178,7 +178,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry) struct map_ent *map = NULL; struct mdinfo *mdi; char devnm[32]; - char container[32]; + char container[MD_NAME_MAX] = {0}; int err; int count; char buf[SYSFS_MAX_BUF_SIZE]; @@ -192,15 +192,9 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry) * to stop is probably a bad idea. */ mdi = sysfs_read(fd, NULL, GET_LEVEL|GET_COMPONENT|GET_VERSION); - if (mdi && is_subarray(mdi->text_version)) { - char *sl; - strncpy(container, mdi->text_version+1, sizeof(container)); - container[sizeof(container)-1] = 0; - sl = strchr(container, '/'); - if (sl) - *sl = 0; - } else - container[0] = 0; + if (mdi && is_subarray(mdi->text_version)) + sysfs_get_container_devnm(mdi, container); + close(fd); count = 5; while (((fd = ((devname[0] == '/') @@ -695,6 +689,99 @@ skip_re_add: return 0; } +/** + * manage_add_external() - Add disk to external container. + * @st: external supertype pointer, must not be NULL, superblock is released here. + * @fd: container file descriptor, must not have O_EXCL mode. + * @disk_fd: device to add file descriptor. + * @disk_name: name of the device to add. + * @disc: disk info. + * + * Superblock is released here because any open fd with O_EXCL will block sysfs_add_disk(). + */ +mdadm_status_t manage_add_external(struct supertype *st, int fd, char *disk_name, + mdu_disk_info_t *disc) +{ + mdadm_status_t rv = MDADM_STATUS_ERROR; + char container_devpath[MD_NAME_MAX]; + struct dev_policy *pols = NULL; + struct mdinfo new_mdi; + struct mdinfo *sra = NULL; + int container_fd; + int disk_fd = -1; + + snprintf(container_devpath, MD_NAME_MAX, "%s", fd2devnm(fd)); + + container_fd = open_dev_excl(container_devpath); + if (!is_fd_valid(container_fd)) { + pr_err("Failed to get exclusive access to container %s\n", container_devpath); + return MDADM_STATUS_ERROR; + } + + /* Check if metadata handler is able to accept the drive */ + if (!st->ss->validate_geometry(st, LEVEL_CONTAINER, 0, 1, NULL, 0, 0, disk_name, NULL, + 0, 1)) + goto out; + + if (mddev_test_and_add_drive_policies(st, &pols, container_fd, 1)) + goto out; + + Kill(disk_name, NULL, 0, -1, 0); + + disk_fd = dev_open(disk_name, O_RDWR | O_EXCL | O_DIRECT); + if (!is_fd_valid(disk_fd)) { + pr_err("Failed to exclusively open %s\n", disk_name); + goto out; + } + + if (drive_test_and_add_policies(st, &pols, disk_fd, 1)) + goto out; + + if (st->ss->add_to_super(st, disc, disk_fd, disk_name, INVALID_SECTORS)) + goto out; + + if (!mdmon_running(st->container_devnm)) + st->ss->sync_metadata(st); + + sra = sysfs_read(container_fd, NULL, 0); + if (!sra) { + pr_err("Failed to read sysfs for %s\n", disk_name); + goto out; + } + + sra->array.level = LEVEL_CONTAINER; + /* Need to set data_offset and component_size */ + st->ss->getinfo_super(st, &new_mdi, NULL); + new_mdi.disk.major = disc->major; + new_mdi.disk.minor = disc->minor; + new_mdi.recovery_start = 0; + + st->ss->free_super(st); + + if (sysfs_add_disk(sra, &new_mdi, 0) != 0) { + pr_err("Failed to add %s to container %s\n", disk_name, container_devpath); + goto out; + } + ping_monitor(container_devpath); + rv = MDADM_STATUS_SUCCESS; + +out: + close(container_fd); + dev_policy_free(pols); + + if (sra) + sysfs_free(sra); + + if (rv != MDADM_STATUS_SUCCESS && is_fd_valid(disk_fd)) + /* Metadata handler records this descriptor, so release it only on failure. */ + close(disk_fd); + + if (st->sb) + st->ss->free_super(st); + + return rv; +} + int Manage_add(int fd, int tfd, struct mddev_dev *dv, struct supertype *tst, mdu_array_info_t *array, int force, int verbose, char *devname, @@ -794,25 +881,23 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, * simply re-add it. */ - if (array->not_persistent == 0) { + if (array->not_persistent == 0 && dv->disposition != 'S') { + int rv = 0; + dev_st = dup_super(tst); dev_st->ss->load_super(dev_st, tfd, NULL); - if (dev_st->sb && dv->disposition != 'S') { - int rv; - rv = attempt_re_add(fd, tfd, dv, dev_st, tst, - rdev, update, devname, - verbose, array); - dev_st->ss->free_super(dev_st); - if (rv) { - free(dev_st); - return rv; - } - } - if (dev_st) { + if (dev_st->sb) { + rv = attempt_re_add(fd, tfd, dv, dev_st, tst, rdev, update, + devname, verbose, array); + dev_st->ss->free_super(dev_st); - free(dev_st); } + + free(dev_st); + + if (rv) + return rv; } if (dv->disposition == 'M') { if (verbose > 0) @@ -968,68 +1053,8 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, if (dv->failfast == FlagSet) disc.state |= (1 << MD_DISK_FAILFAST); if (tst->ss->external) { - /* add a disk - * to an external metadata container */ - struct mdinfo new_mdi; - struct mdinfo *sra; - int container_fd; - char devnm[32]; - int dfd; - - strcpy(devnm, fd2devnm(fd)); - - container_fd = open_dev_excl(devnm); - if (container_fd < 0) { - pr_err("add failed for %s: could not get exclusive access to container\n", - dv->devname); - tst->ss->free_super(tst); + if (manage_add_external(tst, fd, dv->devname, &disc) != MDADM_STATUS_SUCCESS) goto unlock; - } - - /* Check if metadata handler is able to accept the drive */ - if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL, - 0, 0, dv->devname, NULL, 0, 1)) { - close(container_fd); - goto unlock; - } - - Kill(dv->devname, NULL, 0, -1, 0); - dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT); - if (tst->ss->add_to_super(tst, &disc, dfd, - dv->devname, INVALID_SECTORS)) { - close(dfd); - close(container_fd); - goto unlock; - } - if (!mdmon_running(tst->container_devnm)) - tst->ss->sync_metadata(tst); - - sra = sysfs_read(container_fd, NULL, 0); - if (!sra) { - pr_err("add failed for %s: sysfs_read failed\n", - dv->devname); - close(container_fd); - tst->ss->free_super(tst); - goto unlock; - } - sra->array.level = LEVEL_CONTAINER; - /* Need to set data_offset and component_size */ - tst->ss->getinfo_super(tst, &new_mdi, NULL); - new_mdi.disk.major = disc.major; - new_mdi.disk.minor = disc.minor; - new_mdi.recovery_start = 0; - /* Make sure fds are closed as they are O_EXCL which - * would block add_disk */ - tst->ss->free_super(tst); - if (sysfs_add_disk(sra, &new_mdi, 0) != 0) { - pr_err("add new device to external metadata failed for %s\n", dv->devname); - close(container_fd); - sysfs_free(sra); - goto unlock; - } - ping_monitor(devnm); - sysfs_free(sra); - close(container_fd); } else { tst->ss->free_super(tst); if (ioctl(fd, ADD_NEW_DISK, &disc)) { diff --git a/Monitor.c b/Monitor.c index 824a69f..9b016bc 100644 --- a/Monitor.c +++ b/Monitor.c @@ -451,17 +451,19 @@ static int check_one_sharer(int scan) return 2; } - if (access(AUTOREBUILD_PID_PATH, F_OK) != 0) - return 0; + fp = fopen(AUTOREBUILD_PID_PATH, "r"); + if (!fp) { + /* PID file does not exist */ + if (errno == ENOENT) + return 0; - if (!is_file(AUTOREBUILD_PID_PATH)) { - pr_err("%s is not a regular file.\n", AUTOREBUILD_PID_PATH); + pr_err("Cannot open %s file.\n", AUTOREBUILD_PID_PATH); return 2; } - fp = fopen(AUTOREBUILD_PID_PATH, "r"); - if (!fp) { - pr_err("Cannot open %s file.\n", AUTOREBUILD_PID_PATH); + if (!is_file(AUTOREBUILD_PID_PATH)) { + pr_err("%s is not a regular file.\n", AUTOREBUILD_PID_PATH); + fclose(fp); return 2; } @@ -1006,34 +1008,6 @@ static int add_new_arrays(struct mdstat_ent *mdstat, struct state **statelist) return new_found; } -static int get_required_spare_criteria(struct state *st, - struct spare_criteria *sc) -{ - int fd; - - if (!st->metadata || !st->metadata->ss->get_spare_criteria) { - sc->min_size = 0; - sc->sector_size = 0; - return 0; - } - - fd = open(st->devname, O_RDONLY); - if (fd < 0) - return 1; - if (st->metadata->ss->external) - st->metadata->ss->load_container(st->metadata, fd, st->devname); - else - st->metadata->ss->load_super(st->metadata, fd, st->devname); - close(fd); - if (!st->metadata->sb) - return 1; - - st->metadata->ss->get_spare_criteria(st->metadata, sc); - st->metadata->ss->free_super(st->metadata); - - return 0; -} - static int check_donor(struct state *from, struct state *to) { struct state *sub; @@ -1068,22 +1042,12 @@ static dev_t choose_spare(struct state *from, struct state *to, for (d = from->raid; !dev && d < MAX_DISKS; d++) { if (from->devid[d] > 0 && from->devstate[d] == 0) { struct dev_policy *pol; - unsigned long long dev_size; - unsigned int dev_sector_size; if (to->metadata->ss->external && test_partition_from_id(from->devid[d])) continue; - if (sc->min_size && - dev_size_from_id(from->devid[d], &dev_size) && - dev_size < sc->min_size) - continue; - - if (sc->sector_size && - dev_sector_size_from_id(from->devid[d], - &dev_sector_size) && - sc->sector_size != dev_sector_size) + if (devid_matches_criteria(to->metadata, from->devid[d], sc) == false) continue; pol = devid_policy(from->devid[d]); @@ -1168,12 +1132,12 @@ static void try_spare_migration(struct state *statelist) { struct state *from; struct state *st; - struct spare_criteria sc; link_containers_with_subarrays(statelist); for (st = statelist; st; st = st->next) if (st->active < st->raid && st->spare == 0 && !st->err) { struct domainlist *domlist = NULL; + struct spare_criteria sc = {0}; int d; struct state *to = st; @@ -1186,8 +1150,11 @@ static void try_spare_migration(struct state *statelist) /* member of a container */ to = to->parent; - if (get_required_spare_criteria(to, &sc)) - continue; + if (to->metadata->ss->get_spare_criteria) + if (to->metadata->ss->get_spare_criteria(to->metadata, to->devname, + &sc)) + continue; + if (to->metadata->ss->external) { /* We must make sure there is * no suitable spare in container already. @@ -1228,6 +1195,7 @@ static void try_spare_migration(struct state *statelist) } } domain_free(domlist); + dev_policy_free(sc.pols); } } diff --git a/README.initramfs b/README.initramfs deleted file mode 100644 index c5fa668..0000000 --- a/README.initramfs +++ /dev/null @@ -1,122 +0,0 @@ -Assembling md arrays at boot time. ---------------------------------- -December 2005 - -These notes apply to 2.6 kernels only and, in some cases, -to 2.6.15 or later. - -Md arrays can be assembled at boot time using the 'autodetect' functionality -which is triggered by storing components of an array in partitions of type -'fd' - Linux Raid Autodetect. -They can also be assembled by specifying the component devices in a -kernel parameter such as - md=0,/dev/sda,/dev/sdb -In this case, /dev/md0 will be assembled (because of the 0) from the listed -devices. - -These mechanisms, while useful, do not provide complete functionality -and are unlikely to be extended. The preferred way to assemble md -arrays at boot time is using 'mdadm'. To assemble an array which -contains the root filesystem, mdadm needs to be run before that -filesystem is mounted, and so needs to be run from an initial-ram-fs. -It is how this can work that is the primary focus of this document. - -It should be noted up front that only the array containing the root -filesystem should be assembled from the initramfs. Any other arrays -should be assembled under the control of files on the main filesystem -as this enhanced flexibility and maintainability. - -A minimal initramfs for assembling md arrays can be created using 3 -files and one directory. These are: - -/bin Directory -/bin/mdadm statically linked mdadm binary -/bin/busybox statically linked busybox binary -/bin/sh hard link to /bin/busybox -/init a shell script which call mdadm appropriately. - -An example init script is: - -============================================== -#!/bin/sh - -echo 'Auto-assembling boot md array' -mkdir /proc -mount -t proc proc /proc -if [ -n "$rootuuid" ] -then arg=--uuid=$rootuuid -elif [ -n "$mdminor" ] -then arg=--super-minor=$mdminor -else arg=--super-minor=0 -fi -echo "Using $arg" -mdadm -Acpartitions $arg --auto=part /dev/mda -cd / -mount /dev/mda1 /root || mount /dev/mda /root -umount /proc -cd /root -exec chroot . /sbin/init < /dev/console > /dev/console 2>&1 -============================================= - -This could certainly be extended, or merged into a larger init script. -Though tested and in production use, it is not presented here as -"The Right Way" to do it, but as a useful example. -Some key points are: - - /proc needs to be mounted so that /proc/partitions can be accessed - by mdadm, and so that /proc/filesystems can be accessed by mount. - - The uuid of the array can be passed in as a kernel parameter - (rootuuid). As the kernel doesn't use this value, it is made available - in the environment for /init - - If no uuid is given, we default to md0, (--super-minor=0) which is a - commonly used to store the root filesystem. This may not work in - all situations. - - We assemble the array as a partitionable array (/dev/mda) even if we - end up using the whole array. There is no cost in using the partitionable - interface, and in this context it is simpler. - - We try mounting both /dev/mda1 and /dev/mda as they are the most like - part of the array to contain the root filesystem. - - The --auto flag is given to mdadm so that it will create /dev/md* - files automatically. This is needed as /dev will not contain - and md files, and udev will not create them (as udev only created device - files after the device exists, and mdadm need the device file to create - the device). Note that the created md files may not exist in /dev - of the mounted root filesystem. This needs to be deal with separately - from mdadm - possibly using udev. - - We do not need to create device files for the components which will - be assembled into /dev/mda. mdadm finds the major/minor numbers from - /proc/partitions and creates a temporary /dev file if one doesn't already - exist. - -The script "mkinitramfs" which is included with the mdadm distribution -can be used to create a minimal initramfs. It creates a file called -'init.cpio.gz' which can be specified as an 'initrd' to lilo or grub -(or whatever boot loader is being used). - - - - -Resume from an md array ------------------------ - -If you want to make use of the suspend-to-disk/resume functionality in Linux, -and want to have swap on an md array, you will need to assemble the array -before resume is possible. -However, because the array is active in the resumed image, you do not want -anything written to any drives during the resume process, such as superblock -updates or array resync. - -This can be achieved in 2.6.15-rc1 and later kernels using the -'start_readonly' module parameter. -Simply include the command - echo 1 > /sys/module/md_mod/parameters/start_ro -before assembling the array with 'mdadm'. -You can then echo - 9:0 -or whatever is appropriate to /sys/power/resume to trigger the resume. diff --git a/README.md b/README.md new file mode 100644 index 0000000..64f2ece --- /dev/null +++ b/README.md @@ -0,0 +1,83 @@ +**mdadm** is a utility used to create and manage **software RAID** devices implemented through +**Multiple devices driver (MD)** in kernel. It supports following RAID metadata formats: + +* [Linux native RAID](https://raid.wiki.kernel.org/index.php/RAID_superblock_formats): + + Known as **native** or **native RAID**. First and default metadata format. Metadata management + is implemented in **MD driver**. + +* Matrix Storage Manager Support (no reference, metadata format documentation is proprietary). + + Known as **IMSM**. Metadata format developed and maintained by **Intel®** as a part of **VROC** + solution. There are some functional differences between **native** and **imsm**. The most + important difference is that the metadata is managed from userspace. + + **CAUTION:** **imsm** is compatible with **Intel RST**, however it is not officially supported. + You are using it on your own risk. + +* [Common RAID DDF Specification Revision](https://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf) + + **IMPORTANT:** DDF is in **maintenance only** mode. There is no active development around it. + Please do not use it in new solutions. + +# How to Contribute + + **mdadm** is hosted on [kernel.org](https://kernel.org/). You can access repository +[here](https://git.kernel.org/pub/scm/utils/mdadm/mdadm.git). + +It is maintained similarly to kernel, using *mailing list*. Patches must be send through email. +Please familiarize with general kernel +[submitting patches](https://www.kernel.org/doc/html/v4.17/process/submitting-patches.html) +documentation. Formatting, tags and commit message guidelines applies to **mdadm**. + +## Sending patches step-by-step + +To maximize change of patches being taken, follow this instruction when submitting: + +1. Create possibly logically separated commits and generate patches: + + Use ``git format-patch --cover-letter --signoff -v `` to create patches: + * ``--cover-letter`` can be skipped if it is only one patch; + * ``--signoff`` adds sign-off tag; + * ``-v `` indicates review revision number, sender should increment it before resending. + +2. Check style of every patch with kernel + [checkpatch](https://docs.kernel.org/dev-tools/checkpatch.html) script: + + It is important to keep same coding style that is why in **mdadm** + [kernel coding style](https://www.kernel.org/doc/html/v4.10/process/coding-style.html) + is preferred. ``checkpath --no-tree `` can be used to verify patches. + Following checkpatch issues can be ignored: + - New typedefs. + - comparing with *True/False*. + - kernel *MAINTAINERS* file warning. + - *extern* keyword in headers. + +3. Send patches using ``git send-mail --to=linux-raid@vger.kernel.org (...)`` + +# Maintainers + +It is good practice to add **mdadm maintainers** to recipients for patches: + +- Jes Sorensen ; +- Mariusz Tkaczyk ; + +Adding **MD maintainers** could be reasonable, especially if patches may affect MD driver: + +- Song Liu ; +- Yu Kuai ; + +# Reviewers + +**mdadm** utility is not part of kernel tree, so there is no certificated *Reviewers* list. Everyone +can comment on mailing list, last decision (and merging) belongs to maintainers. + +# Minimal supported kernel version + +We do not support kernel versions below **v3.10**. Please be aware that maintainers may remove +workarounds and fixes for legacy issues. + +# License + +It is released under the terms of the **GNU General Public License version 2** as published +by the **Free Software Foundation**. diff --git a/TODO b/TODO deleted file mode 100644 index 279d20d..0000000 --- a/TODO +++ /dev/null @@ -1,213 +0,0 @@ - - add 'name' field to metadata type and use it. - - use validate_geometry more - - metadata should be able to check/reject bitmap stuff. - -DDF: - Three new metadata types: - ddf - used only to create a container. - ddf-bvd - used to create an array in a container - ddf-svd - used to create a secondary array from bvds. - - Usage: - mdadm -C /dev/ddf1 /dev/sd[abcdef] - mdadm -C /dev/md1 -e ddf /dev/sd[a-f] - mdadm -C /dev/md1 -l container /dev/sd[a-f] - - Each of these create a new ddf container using all those - devices. The name 'ddf*' signals that ddf metadata should be used. - '-e ddf' only supports one level - 'container'. 'container' is only - supported by ddf. - - mdadm -C /dev/md1 -l0 -n4 /dev/ddf1 # or maybe not ??? - mdadm -C /dev/md1 -l1 -n2 /dev/sda /dev/sdb - If exactly one device is given, and it is a container, we select - devices from that container. - If devices are given that are already in use, they must be in use by - a container, and the array is created in the container. - If devices given are bvds, we slip under the hood to make - the svd arrays. - - mdadm -A /dev/ddf ...... - base drives make a container. Anything in that container is started - auto-read-only. - if /dev/ddf is already assembled, we assemble bvds and svds inside it. - - -2005-dec-20 - Want an incremental assembly mode to work nicely with udev. - Core usage would be something like - mdadm --incr-assemble /dev/newdevice - This would - - examine the device to determine uuid etc. - - look for a match in /etc/mdadm.conf, abort if not found - - find that device and collect current contents - - perform an 'assemble' analysis to make sure we have the best set of devices. - - remove or add devices as appropriate - - possibly start the array if it was complete - - Other usages could involve - - specify which array to auto-add to. - This requires an existing array for uuid matching... is there any point? - - - - - -2004-june-02 - * Don't print 'errors' flag, it is meaningless. DONE - * Handle new superblock format - * create device file on demand, particularly partitionable devices. DONE - BUT figure a way to create the partition devices. - auto=partN - * Use Event: interface to listen for events. DONE, untested - * Make sure mdadm -As can assemble multi-level RAIDs ok. - * --build to build raid1 or multipath arrays - clean or not ??? - ----------------------------------------------------------------------------- -* mdadm --monitor to monitor failed multipath paths and re-instate them. - -* Maybe make "--help" fit in 80x24 and have a --long-help with more info. DONE - - -* maybe "missing" instead of missing in doco DONE -* possibly wait for resync to start, or even finish while assembling.- NO - -* -Db should have a devices= entry if possible. - DONE -* when assembling multipath arrays, ignore any error indicators. - DONE -* rationalise --monitor usage: - mdadm --monitor - doesn't do as expected. DONE - -* --assemble could have a --update option. - DONE - following word can be: - sparc2.2 - super-minor - -* mdadm /dev/md11, where md11 is raid0 can segfault, particularly when looking in the - [UU_UUU] string ... which doesn't exist ! -It should be more sensible. DONE - -Example: - -from Raimund Sacherer - -mke2fs -m0 -q /dev/ram1 300 -mount -n -t ext2 /dev/ram1 /tmp -echo DEVICE /dev/[sh]* >> /tmp/mdadm.conf -mdadm -Esb /dev/[sh]* 2>/dev/null >> /tmp/mdadm.conf -mdadm -ARsc /tmp/mdadm.conf -umount /tmp - - -?? Allow -S /dev/md? - current complains subsequent not a/d/r - DONE - -* new "Query" mode to subsume --detail and --examine. - --query or -Q, takes a device and tells if it is an MD device, - and also tells in a raid superblock is found. - DONE - -* write mdstat.c to parse /proc/mdstat file - Build list of arrays: name, rebuild-percent - DONE - -* parse /proc/partitions and map major/minor into /dev/* names, - and use that for default DEVICE list ???? - -* --detail --scan to read /proc/mdstat, and then iterate over these, - but assume --brief. --verbose can override - check each subdevice to see if it is in conf_get_devs. - Warn if not. - DONE, but don't warn yet... - -* Support multipath ... maybe... - maybe DONE - -* --follow to syslog - -* --follow to move spares around DONE - -* --follow to notice other events: DONE - rebuild started - spare activated - spare removed - spare added - ------------------------------------- -- --examine --scan scans all drives and build an mdadm.conf file DONE - -- check superblock checksum in examine DONE -- report "chunk" or "rounding" depending on raid level DONE -- report "linear" instead of "-1" for raid level DONE -- decode ayout depending on raid level DONE -- --verbose and --force flags. DONE - -- set md_minor, *_disks for Create - DONE -- for create raid5, how to choose between - all working, but not insync - one missing, one spare, insync DONE (--force) -- and for raid1 - some failed drives... (missing) - -- when RUN_ARRAY, make sure *_disks counts are right - -- get --detail to extract extra stuff from superblock, - like uuid DONE -- --detail --brief to give a config file line DONE -- parse config file. DONE -- test... - -- when --assemble --scan, if an underlying device is an md device, - then try to assemble that device first. - - -- mdadm -S /dev/md0 /dev/md1 gives internal error FIXED - -- mdadm --detail --scan print summary of what it can find? DONE - - ---------- -Assemble doesn't add spares. - DONE -Create to allow "missing" name for devices. -Create to accept "--force" for do exactly what is requested -- get Assemble to upgrade devices if force flag. -ARRAY lines in config file to have super_minor=n -ARRAY lines in config file to have device=pattern, and only accept - those devices - If UUID given, insist on that - If not, but super_minor given, require all found with that minor - to have same uuid - If only device given, all valid supers on those devices must have - same uuid -allow /dev/mdX as first argument before any options -Possible --dry-run option for create and assemble--force - -Assemble to check that all devices mentioned in superblock - are present. - -New mode: --Monitor (or --Follow) - Periodically check status of all arrays (listed in config file). - Log every event and apparent cause - or differences - Email and alert - or run a program - for important events - Move spares around if necessary. - - An Array line can have a spare-group= field that indicates that - the array shares spares with other arrays with the same - spare-group name. - If an array has a failed and no spares, then check all other - arrays in the spare group. If one has no failures and a spare, - then consider that spare. - Choose the smallest considered spare that is large enough. - If there is one, then hot-remove it from it's home, and - hot-add it to the array in question. - - --mail-to address - --alert-handler program - - Will also extract information from /proc/mdstat if present, - and consider 20% marks in rebuild as events. - - Events are: - drive fails - causes mail to be sent - rebuild started - spare activated - spare removed - spare added diff --git a/config.c b/config.c index 44f7dd2..b46d71c 100644 --- a/config.c +++ b/config.c @@ -81,7 +81,7 @@ char DefaultAltConfDir[] = CONFFILE2 ".d"; enum linetype { Devices, Array, Mailaddr, Mailfrom, Program, CreateDev, Homehost, HomeCluster, AutoMode, Policy, PartPolicy, Sysfs, - MonitorDelay, LTEnd }; + MonitorDelay, EncryptionNoVerify, LTEnd }; char *keywords[] = { [Devices] = "devices", [Array] = "array", @@ -96,6 +96,7 @@ char *keywords[] = { [PartPolicy]="part-policy", [Sysfs] = "sysfs", [MonitorDelay] = "monitordelay", + [EncryptionNoVerify] = "ENCRYPTION_NO_VERIFY", [LTEnd] = NULL }; @@ -729,6 +730,19 @@ void monitordelayline(char *line) } } +static bool sata_opal_encryption_no_verify; +void encryption_no_verify_line(char *line) +{ + char *word; + + for (word = dl_next(line); word != line; word = dl_next(word)) { + if (strcasecmp(word, "sata_opal") == 0) + sata_opal_encryption_no_verify = true; + else + pr_err("unrecognised word on ENCRYPTION_NO_VERIFY line: %s\n", word); + } +} + char auto_yes[] = "yes"; char auto_no[] = "no"; char auto_homehost[] = "homehost"; @@ -913,6 +927,9 @@ void conf_file(FILE *f) case MonitorDelay: monitordelayline(line); break; + case EncryptionNoVerify: + encryption_no_verify_line(line); + break; default: pr_err("Unknown keyword %s\n", line); } @@ -1075,6 +1092,12 @@ int conf_get_monitor_delay(void) return monitor_delay; } +bool conf_get_sata_opal_encryption_no_verify(void) +{ + load_conffile(); + return sata_opal_encryption_no_verify; +} + struct createinfo *conf_get_create_info(void) { load_conffile(); diff --git a/documentation/external-reshape-design.txt b/documentation/external-reshape-design.txt new file mode 100644 index 0000000..e4cf4e1 --- /dev/null +++ b/documentation/external-reshape-design.txt @@ -0,0 +1,280 @@ +External Reshape + +1 Problem statement + +External (third-party metadata) reshape differs from native-metadata +reshape in three key ways: + +1.1 Format specific constraints + +In the native case reshape is limited by what is implemented in the +generic reshape routine (Grow_reshape()) and what is supported by the +kernel. There are exceptional cases where Grow_reshape() may block +operations when it knows that the kernel implementation is broken, but +otherwise the kernel is relied upon to be the final arbiter of what +reshape operations are supported. + +In the external case the kernel, and the generic checks in +Grow_reshape(), become the super-set of what reshapes are possible. The +metadata format may not support, or have yet to implement a given +reshape type. The implication for Grow_reshape() is that it must query +the metadata handler and effect changes in the metadata before the new +geometry is posted to the kernel. The ->reshape_super method allows +Grow_reshape() to validate the requested operation and post the metadata +update. + +1.2 Scope of reshape + +Native metadata reshape is always performed at the array scope (no +metadata relationship with sibling arrays on the same disks). External +reshape, depending on the format, may not allow the number of member +disks to be changed in a subarray unless the change is simultaneously +applied to all subarrays in the container. For example the imsm format +requires all member disks to be a member of all subarrays, so a 4-disk +raid5 in a container that also houses a 4-disk raid10 array could not be +reshaped to 5 disks as the imsm format does not support a 5-disk raid10 +representation. This requires the ->reshape_super method to check the +contents of the array and ask the user to run the reshape at container +scope (if all subarrays are agreeable to the change), or report an +error in the case where one subarray cannot support the change. + +1.3 Monitoring / checkpointing + +Reshape, unlike rebuild/resync, requires strict checkpointing to survive +interrupted reshape operations. For example when expanding a raid5 +array the first few stripes of the array will be overwritten in a +destructive manner. When restarting the reshape process we need to know +the exact location of the last successfully written stripe, and we need +to restore the data in any partially overwritten stripe. Native +metadata stores this backup data in the unused portion of spares that +are being promoted to array members, or in an external backup file +(located on a non-involved block device). + +The kernel is in charge of recording checkpoints of reshape progress, +but mdadm is delegated the task of managing the backup space which +involves: +1/ Identifying what data will be overwritten in the next unit of reshape + operation +2/ Suspending access to that region so that a snapshot of the data can + be transferred to the backup space. +3/ Allowing the kernel to reshape the saved region and setting the + boundary for the next backup. + +In the external reshape case we want to preserve this mdadm +'reshape-manager' arrangement, but have a third actor, mdmon, to +consider. It is tempting to give the role of managing reshape to mdmon, +but that is counter to its role as a monitor, and conflicts with the +existing capabilities and role of mdadm to manage the progress of +reshape. For clarity the external reshape implementation maintains the +role of mdmon as a (mostly) passive recorder of raid events, and mdadm +treats it as it would the kernel in the native reshape case (modulo +needing to send explicit metadata update messages and checking that +mdmon took the expected action). + +External reshape can use the generic md backup file as a fallback, but in the +optimal/firmware-compatible case the reshape-manager will use the metadata +specific areas for managing reshape. The implementation also needs to spawn a +reshape-manager per subarray when the reshape is being carried out at the +container level. For these two reasons the ->manage_reshape() method is +introduced. This method in addition to base tasks mentioned above: +1/ Processed each subarray one at a time in series - where appropriate. +2/ Uses either generic routines in Grow.c for md-style backup file + support, or uses the metadata-format specific location for storing + recovery data. +This aims to avoid a "midlayer mistake"[1] and lets the metadata handler +optionally take advantage of generic infrastructure in Grow.c + +2 Details for specific reshape requests + +There are quite a few moving pieces spread out across md, mdadm, and mdmon for +the support of external reshape, and there are several different types of +reshape that need to be comprehended by the implementation. A rundown of +these details follows. + +2.0 General provisions: + +Obtain an exclusive open on the container to make sure we are not +running concurrently with a Create() event. + +2.1 Freezing sync_action + + Before making any attempt at a reshape we 'freeze' every array in + the container to ensure no spare assignment or recovery happens. + This involves writing 'frozen' to sync_action and changing the '/' + after 'external:' in metadata_version to a '-'. mdmon knows that + this means not to perform any management. + + Before doing this we check that all sync_actions are 'idle', which + is racy but still useful. + Afterwards we check that all member arrays have no spares + or partial spares (recovery_start != 'none') which would indicate a + race. If they do, we unfreeze again. + + Once this completes we know all the arrays are stable. They may + still have failed devices as devices can fail at any time. However + we treat those like failures that happen during the reshape. + +2.2 Reshape size + + 1/ mdadm::Grow_reshape(): checks if mdmon is running and optionally + initializes st->update_tail + 2/ mdadm::Grow_reshape() calls ->reshape_super() to check that the size change + is allowed (being performed at subarray scope / enough room) prepares a + metadata update + 3/ mdadm::Grow_reshape(): flushes the metadata update (via + flush_metadata_update(), or ->sync_metadata()) + 4/ mdadm::Grow_reshape(): post the new size to the kernel + + +2.3 Reshape level (simple-takeover) + +"simple-takeover" implies the level change can be satisfied without touching +sync_action + + 1/ mdadm::Grow_reshape(): checks if mdmon is running and optionally + initializes st->update_tail + 2/ mdadm::Grow_reshape() calls ->reshape_super() to check that the level change + is allowed (being performed at subarray scope) prepares a + metadata update + 2a/ raid10 --> raid0: degrade all mirror legs prior to calling + ->reshape_super + 3/ mdadm::Grow_reshape(): flushes the metadata update (via + flush_metadata_update(), or ->sync_metadata()) + 4/ mdadm::Grow_reshape(): post the new level to the kernel + +2.4 Reshape chunk, layout + +2.5 Reshape raid disks (grow) + + 1/ mdadm::Grow_reshape(): unconditionally initializes st->update_tail + because only redundant raid levels can modify the number of raid disks + 2/ mdadm::Grow_reshape(): calls ->reshape_super() to check that the level + change is allowed (being performed at proper scope / permissible + geometry / proper spares available in the container), chooses + the spares to use, and prepares a metadata update. + 3/ mdadm::Grow_reshape(): Converts each subarray in the container to the + raid level that can perform the reshape and starts mdmon. + 4/ mdadm::Grow_reshape(): Pushes the update to mdmon. + 5/ mdadm::Grow_reshape(): uses container_content to find details of + the spares and passes them to the kernel. + 6/ mdadm::Grow_reshape(): gives raid_disks update to the kernel, + sets sync_max, sync_min, suspend_lo, suspend_hi all to zero, + and starts the reshape by writing 'reshape' to sync_action. + 7/ mdmon::monitor notices the sync_action change and tells + managemon to check for new devices. managemon notices the new + devices, opens relevant sysfs file, and passes them all to + monitor. + 8/ mdadm::Grow_reshape() calls ->manage_reshape to oversee the + rest of the reshape. + + 9/ mdadm::->manage_reshape(): saves data that will be overwritten by + the kernel to either the backup file or the metadata specific location, + advances sync_max, waits for reshape, ping mdmon, repeat. + Meanwhile mdmon::read_and_act(): records checkpoints. + Specifically. + + 9a/ if the 'next' stripe to be reshaped will over-write + itself during reshape then: + 9a.1/ increase suspend_hi to cover a suitable number of + stripes. + 9a.2/ backup those stripes safely. + 9a.3/ advance sync_max to allow those stripes to be backed up + 9a.4/ when sync_completed indicates that those stripes have + been reshaped, manage_reshape must ping_manager + 9a.5/ when mdmon notices that sync_completed has been updated, + it records the new checkpoint in the metadata + 9a.6/ after the ping_manager, manage_reshape will increase + suspend_lo to allow access to those stripes again + + 9b/ if the 'next' stripe to be reshaped will over-write unused + space during reshape then we apply same process as above, + except that there is no need to back anything up. + Note that we *do* need to keep suspend_hi progressing as + it is not safe to write to the area-under-reshape. For + kernel-managed-metadata this protection is provided by + ->reshape_safe, but that does not protect us in the case + of user-space-managed-metadata. + + 10/ mdadm::->manage_reshape(): Once reshape completes changes the raid + level back to the nominal raid level (if necessary) + + FIXME: native metadata does not have the capability to record the original + raid level in reshape-restart case because the kernel always records current + raid level to the metadata, whereas external metadata can masquerade at an + alternate level based on the reshape state. + +2.6 Reshape raid disks (shrink) + +3 Interaction with metadata handle. + + The following calls are made into the metadata handler to assist + with initiating and monitoring a 'reshape'. + + 1/ ->reshape_super is called quite early (after only minimial + checks) to make sure that the metadata can record the new shape + and any necessary transitions. It may be passed a 'container' + or an individual array within a container, and it should notice + the difference and act accordingly. + When a reshape is requested against a container it is expected + that it should be applied to every array in the container, + however it is up to the metadata handler to determine final + policy. + + If the reshape is supportable, the internal copy of the metadata + should be updated, and a metadata update suitable for sending + to mdmon should be queued. + + If the reshape will involve converting spares into array members, + this must be recorded in the metadata too. + + 2/ ->container_content will be called to find out the new state + of all the array, or all arrays in the container. Any newly + added devices (with state==0 and raid_disk >= 0) will be added + to the array as spares with the relevant slot number. + + It is likely that the info returned by ->container_content will + have ->reshape_active set, ->reshape_progress set to e.g. 0, and + new_* set appropriately. mdadm will use this information to + cause the correct reshape to start at an appropriate time. + + 3/ ->set_array_state will be called by mdmon when reshape has + started and again periodically as it progresses. This should + record the ->last_checkpoint as the point where reshape has + progressed to. When the reshape finished this will be called + again and it should notice that ->curr_action is no longer + 'reshape' and so should record that the reshape has finished + providing 'last_checkpoint' has progressed suitably. + + 4/ ->manage_reshape will be called once the reshape has been set + up in the kernel but before sync_max has been moved from 0, so + no actual reshape will have happened. + + ->manage_reshape should call progress_reshape() to allow the + reshape to progress, and should back-up any data as indicated + by the return value. See the documentation of that function + for more details. + ->manage_reshape will be called multiple times when a + container is being reshaped, once for each member array in + the container. + + + The progress of the metadata is as follows: + 1/ mdadm sends a metadata update to mdmon which marks the array + as undergoing a reshape. This is set up by + ->reshape_super and applied by ->process_update + For container-wide reshape, this happens once for the whole + container. + 2/ mdmon notices progress via the sysfs files and calls + ->set_array_state to update the state periodically + For container-wide reshape, this happens repeatedly for + one array, then repeatedly for the next, etc. + 3/ mdmon notices when reshape has finished and call + ->set_array_state to record the the reshape is complete. + For container-wide reshape, this happens once for each + member array. + + + +... + +[1]: Linux kernel design patterns - part 3, Neil Brown https://lwn.net/Articles/336262/ diff --git a/documentation/mdadm.conf-example b/documentation/mdadm.conf-example new file mode 100644 index 0000000..35a75d1 --- /dev/null +++ b/documentation/mdadm.conf-example @@ -0,0 +1,65 @@ +# mdadm configuration file +# +# mdadm will function properly without the use of a configuration file, +# but this file is useful for keeping track of arrays and member disks. +# In general, a mdadm.conf file is created, and updated, after arrays +# are created. This is the opposite behavior of /etc/raidtab which is +# created prior to array construction. +# +# +# the config file takes two types of lines: +# +# DEVICE lines specify a list of devices of where to look for +# potential member disks +# +# ARRAY lines specify information about how to identify arrays so +# so that they can be activated +# +# You can have more than one device line and use wild cards. The first +# example includes SCSI the first partition of SCSI disks /dev/sdb, +# /dev/sdc, /dev/sdd, /dev/sdj, /dev/sdk, and /dev/sdl. The second +# line looks for array slices on IDE disks. +# +#DEVICE /dev/sd[bcdjkl]1 +#DEVICE /dev/hda1 /dev/hdb1 +# +# If you mount devfs on /dev, then a suitable way to list all devices is: +#DEVICE /dev/discs/*/* +# +# +# The AUTO line can control which arrays get assembled by auto-assembly, +# meaing either "mdadm -As" when there are no 'ARRAY' lines in this file, +# or "mdadm --incremental" when the array found is not listed in this file. +# By default, all arrays that are found are assembled. +# If you want to ignore all DDF arrays (maybe they are managed by dmraid), +# and only assemble 1.x arrays if which are marked for 'this' homehost, +# but assemble all others, then use +#AUTO -ddf homehost -1.x +all +# +# ARRAY lines specify an array to assemble and a method of identification. +# Arrays can currently be identified by using a UUID, superblock minor number, +# or a listing of devices. +# +# super-minor is usually the minor number of the metadevice +# UUID is the Universally Unique Identifier for the array +# Each can be obtained using +# +# mdadm -D +# +#ARRAY /dev/md0 UUID=3aaa0122:29827cfa:5331ad66:ca767371 +#ARRAY /dev/md1 super-minor=1 +#ARRAY /dev/md2 devices=/dev/hda1,/dev/hdb1 +# +# ARRAY lines can also specify a "spare-group" for each array. mdadm --monitor +# will then move a spare between arrays in a spare-group if one array has a failed +# drive but no spare +#ARRAY /dev/md4 uuid=b23f3c6d:aec43a9f:fd65db85:369432df spare-group=group1 +#ARRAY /dev/md5 uuid=19464854:03f71b1b:e0df2edd:246cc977 spare-group=group1 +# +# When used in --follow (aka --monitor) mode, mdadm needs a +# mail address and/or a program. This can be given with "mailaddr" +# and "program" lines to that monitoring can be started using +# mdadm --follow --scan & echo $! > /run/mdadm/mon.pid +# If the lines are not found, mdadm will exit quietly +#MAILADDR root@mydomain.tld +#PROGRAM /usr/sbin/handle-mdadm-events diff --git a/documentation/mdmon-design.txt b/documentation/mdmon-design.txt new file mode 100644 index 0000000..f09184a --- /dev/null +++ b/documentation/mdmon-design.txt @@ -0,0 +1,146 @@ + +When managing a RAID1 array which uses metadata other than the +"native" metadata understood by the kernel, mdadm makes use of a +partner program named 'mdmon' to manage some aspects of updating +that metadata and synchronising the metadata with the array state. + +This document provides some details on how mdmon works. + +Containers +---------- + +As background: mdadm makes a distinction between an 'array' and a +'container'. Other sources sometimes use the term 'volume' or +'device' for an 'array', and may use the term 'array' for a +'container'. + +For our purposes: + - a 'container' is a collection of devices which are described by a + single set of metadata. The metadata may be stored equally + on all devices, or different devices may have quite different + subsets of the total metadata. But there is conceptually one set + of metadata that unifies the devices. + + - an 'array' is a set of datablock from various devices which + together are used to present the abstraction of a single linear + sequence of block, which may provide data redundancy or enhanced + performance. + +So a container has some metadata and provides a number of arrays which +are described by that metadata. + +Sometimes this model doesn't work perfectly. For example, global +spares may have their own metadata which is quite different from the +metadata from any device that participates in one or more arrays. +Such a global spare might still need to belong to some container so +that it is available to be used should a failure arise. In that case +we consider the 'metadata' to be the union of the metadata on the +active devices which describes the arrays, and the metadata on the +global spares which only describes the spares. In this case different +devices in the one container will have quite different metadata. + + +Purpose +------- + +The main purpose of mdmon is to update the metadata in response to +changes to the array which need to be reflected in the metadata before +futures writes to the array can safely be performed. +These include: + - transitions from 'clean' to 'dirty'. + - recording the devices have failed. + - recording the progress of a 'reshape' + +This requires mdmon to be running at any time that the array is +writable (a read-only array does not require mdmon to be running). + +Because mdmon must be able to process these metadata updates at any +time, it must (when running) have exclusive write access to the +metadata. Any other changes (e.g. reconfiguration of the array) must +go through mdmon. + +A secondary role for mdmon is to activate spares when a device fails. +This role is much less time-critical than the other metadata updates, +so it could be performed by a separate process, possibly +"mdadm --monitor" which has a related role of moving devices between +arrays. A main reason for including this functionality in mdmon is +that in the native-metadata case this function is handled in the +kernel, and mdmon's reason for existence to provide functionality +which is otherwise handled by the kernel. + + +Design overview +--------------- + +mdmon is structured as two threads with a common address space and +common data structures. These threads are know as the 'monitor' and +the 'manager'. + +The 'monitor' has the primary role of monitoring the array for +important state changes and updating the metadata accordingly. As +writes to the array can be blocked until 'monitor' completes and +acknowledges the update, it much be very careful not to block itself. +In particular it must not block waiting for any write to complete else +it could deadlock. This means that it must not allocate memory as +doing this can require dirty memory to be written out and if the +system choose to write to the array that mdmon is monitoring, the +memory allocation could deadlock. + +So 'monitor' must never allocate memory and must limit the number of +other system call it performs. It may: + - use select (or poll) to wait for activity on a file descriptor + - read from a sysfs file descriptor + - write to a sysfs file descriptor + - write the metadata out to the block devices using O_DIRECT + - send a signal (kill) to the manager thread + +It must not e.g. open files or do anything similar that might allocate +resources. + +The 'manager' thread does everything else that is needed. If any +files are to be opened (e.g. because a device has been added to the +array), the manager does that. If any memory needs to be allocated +(e.g. to hold data about a new array as can happen when one set of +metadata describes several arrays), the manager performs that +allocation. + +The 'manager' is also responsible for communicating with mdadm and +assigning spares to replace failed devices. + + +Handling metadata updates +------------------------- + +There are a number of cases in which mdadm needs to update the +metdata which mdmon is managing. These include: + - creating a new array in an active container + - adding a device to a container + - reconfiguring an array +etc. + +To complete these updates, mdadm must send a message to mdmon which +will merge the update into the metadata as it is at that moment. + +To achieve this, mdmon creates a Unix Domain Socket which the manager +thread listens on. mdadm sends a message over this socket. The +manager thread examines the message to see if it will require +allocating any memory and allocates it. This is done in the +'prepare_update' metadata method. + +The update message is then queued for handling by the monitor thread +which it will do when convenient. The monitor thread calls +->process_update which should atomically make the required changes to +the metadata, making use of the pre-allocate memory as required. Any +memory the is no-longer needed can be placed back in the request and +the manager thread will free it. + +The exact format of a metadata update is up to the implementer of the +metadata handlers. It will simply describe a change that needs to be +made. It will sometimes contain fragments of the metadata to be +copied in to place. However the ->process_update routine must make +sure not to over-write any field that the monitor thread might have +updated, such as a 'device failed' or 'array is dirty' state. + +When the monitor thread has completed the update and written it to the +devices, an acknowledgement message is sent back over the socket so +that mdadm knows it is complete. diff --git a/drive_encryption.c b/drive_encryption.c new file mode 100644 index 0000000..27da962 --- /dev/null +++ b/drive_encryption.c @@ -0,0 +1,724 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Read encryption information for Opal and ATA devices. + * + * Copyright (C) 2024 Intel Corporation + * Author: Blazej Kucman + */ + +#include "mdadm.h" + +#include +#include +#include +#include +#include "drive_encryption.h" + +#define DEFAULT_SECTOR_SIZE (512) + +/* + * Opal defines + * TCG Storage Opal SSC 2.01 chapter 3.3.3 + * NVM ExpressTM Revision 1.4c, chapter 5 + */ +#define TCG_SECP_01 (0x01) +#define TCG_SECP_00 (0x00) +#define OPAL_DISCOVERY_COMID (0x0001) +#define OPAL_LOCKING_FEATURE (0x0002) +#define OPAL_IO_BUFFER_LEN 2048 +#define OPAL_DISCOVERY_FEATURE_HEADER_LEN (4) + +/* + * NVMe defines + * NVM ExpressTM Revision 1.4c, chapter 5 + */ +#define NVME_SECURITY_RECV (0x82) +#define NVME_IDENTIFY (0x06) +#define NVME_IDENTIFY_RESPONSE_LEN 4096 +#define NVME_OACS_BYTE_POSITION (256) +#define NVME_IDENTIFY_CONTROLLER_DATA (1) + +/* + * ATA defines + * ATA/ATAPI Command Set ATA8-ACS + * SCSI / ATA Translation - 3 (SAT-3) + * SCSI Primary Commands - 4 (SPC-4) + * AT Attachment-8 - ATA Serial Transport (ATA8-AST) + * ATA Command Pass-Through + */ +#define ATA_IDENTIFY (0xec) +#define ATA_TRUSTED_RECEIVE (0x5c) +#define ATA_SECURITY_WORD_POSITION (128) +#define HDIO_DRIVE_CMD (0x031f) +#define ATA_TRUSTED_COMPUTING_POS (48) +#define ATA_PASS_THROUGH_12 (0xa1) +#define ATA_IDENTIFY_RESPONSE_LEN (512) +#define ATA_PIO_DATA_IN (4) +#define SG_CHECK_CONDITION (0x02) +#define ATA_STATUS_RETURN_DESCRIPTOR (0x09) +#define ATA_PT_INFORMATION_AVAILABLE_ASCQ (0x1d) +#define ATA_PT_INFORMATION_AVAILABLE_ASC (0x00) +#define ATA_INQUIRY_LENGTH (0x0c) +#define SG_INTERFACE_ID 'S' +#define SG_IO_TIMEOUT (60000) +#define SG_SENSE_SIZE (32) +#define SENSE_DATA_CURRENT_FIXED (0x70) +#define SENSE_DATA_CURRENT_DESC (0x72) +#define SENSE_CURRENT_RES_DESC_POS (8) +#define SG_DRIVER_SENSE (0x08) + +typedef enum drive_feature_support_status { + /* Drive feature is supported. */ + DRIVE_FEAT_SUP_ST = 0, + /* Drive feature is not supported. */ + DRIVE_FEAT_NOT_SUP_ST, + /* Drive feature support check failed. */ + DRIVE_FEAT_CHECK_FAILED_ST +} drive_feat_sup_st; + +/* TCG Storage Opal SSC 2.01 chapter 3.1.1.3 */ +typedef struct opal_locking_feature { + /* feature header */ + __u16 feature_code; + __u8 reserved : 4; + __u8 version : 4; + __u8 description_length; + /* feature description */ + __u8 locking_supported : 1; + __u8 locking_enabled : 1; + __u8 locked : 1; + __u8 media_encryption : 1; + __u8 mbr_enabled : 1; + __u8 mbr_done : 1; + __u8 mbr_shadowing_not_supported : 1; + __u8 hw_reset_for_dor_supported : 1; + __u8 reserved1[11]; +} __attribute__((__packed__)) opal_locking_feature_t; + +/* TCG Storage Opal SSC 2.01 chapter 3.1.1.1 */ +typedef struct opal_level0_header { + __u32 length; + __u32 version; + __u64 reserved; + __u8 vendor_specific[32]; +} opal_level0_header_t; + +/** + * NVM ExpressTM Revision 1.4c, Figure 249 + * Structure specifies only OACS filed, which is needed in the current use case. + */ +typedef struct nvme_identify_ctrl { + __u8 reserved[255]; + __u16 oacs; + __u8 reserved2[3839]; +} nvme_identify_ctrl_t; + +/* SCSI Primary Commands - 4 (SPC-4), Table 512 */ +typedef struct supported_security_protocols { + __u8 reserved[6]; + __u16 list_length; + __u8 list[504]; +} supported_security_protocols_t; + +/* ATA/ATAPI Command Set - 3 (ACS-3), Table 45 */ +typedef struct ata_security_status { + __u16 security_supported : 1; + __u16 security_enabled : 1; + __u16 security_locked : 1; + __u16 security_frozen : 1; + __u16 security_count_expired : 1; + __u16 enhanced_security_erase_supported : 1; + __u16 reserved1 : 2; + __u16 security_level : 1; + __u16 reserved2 : 7; +} __attribute__((__packed__)) ata_security_status_t; + +/* ATA/ATAPI Command Set - 3 (ACS-3), Table 45 */ +typedef struct ata_trusted_computing { + __u16 tc_feature :1; + __u16 reserved : 13; + __u16 var1 : 1; + __u16 var2 : 1; +} __attribute__((__packed__)) ata_trusted_computing_t; + +mapping_t encryption_ability_map[] = { + { "None", ENC_ABILITY_NONE }, + { "Other", ENC_ABILITY_OTHER }, + { "SED", ENC_ABILITY_SED }, + { NULL, UnSet } +}; + +mapping_t encryption_status_map[] = { + { "Unencrypted", ENC_STATUS_UNENCRYPTED }, + { "Locked", ENC_STATUS_LOCKED }, + { "Unlocked", ENC_STATUS_UNLOCKED }, + { NULL, UnSet } +}; + +/** + * get_encryption_ability_string() - get encryption ability name string. + * @ability: encryption ability enum. + * + * Return: encryption ability string. + */ +const char *get_encryption_ability_string(enum encryption_ability ability) +{ + return map_num_s(encryption_ability_map, ability); +} + +/** + * get_encryption_status_string() - get encryption status name string. + * @ability: encryption status enum. + * + * Return: encryption status string. + */ +const char *get_encryption_status_string(enum encryption_status status) +{ + return map_num_s(encryption_status_map, status); +} + +/** + * get_opal_locking_feature_description() - get opal locking feature description. + * @response: response from Opal Discovery Level 0. + * + * Based on the documentation TCG Storage Opal SSC 2.01 chapter 3.1.1, + * a Locking feature is searched for in Opal Level 0 Discovery response. + * + * Return: if locking feature is found, pointer to struct %opal_locking_feature_t, NULL otherwise. + */ +static opal_locking_feature_t *get_opal_locking_feature_description(__u8 *response) +{ + opal_level0_header_t *response_header = (opal_level0_header_t *)response; + int features_length = __be32_to_cpu(response_header->length); + int current_position = sizeof(*response_header); + + while (current_position < features_length) { + opal_locking_feature_t *feature; + + feature = (opal_locking_feature_t *)(response + current_position); + + if (__be16_to_cpu(feature->feature_code) == OPAL_LOCKING_FEATURE) + return feature; + + current_position += feature->description_length + OPAL_DISCOVERY_FEATURE_HEADER_LEN; + } + + return NULL; +} + +/** + * nvme_security_recv_ioctl() - nvme security receive ioctl. + * @disk_fd: a disk file descriptor. + * @sec_protocol: security protocol. + * @comm_id: command id. + * @response_buffer: response buffer to fill out. + * @buf_size: response buffer size. + * @verbose: verbose flag. + * + * Based on the documentations TCG Storage Opal SSC 2.01 chapter 3.3.3 and + * NVM ExpressTM Revision 1.4c, chapter 5.25, + * read security receive command via ioctl(). + * On success, @response_buffer is completed. + * + * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise. + */ +static mdadm_status_t +nvme_security_recv_ioctl(int disk_fd, __u8 sec_protocol, __u16 comm_id, void *response_buffer, + size_t buf_size, const int verbose) +{ + struct nvme_admin_cmd nvme_cmd = {0}; + int status; + + nvme_cmd.opcode = NVME_SECURITY_RECV; + nvme_cmd.cdw10 = sec_protocol << 24 | comm_id << 8; + nvme_cmd.cdw11 = buf_size; + nvme_cmd.data_len = buf_size; + nvme_cmd.addr = (__u64)response_buffer; + + status = ioctl(disk_fd, NVME_IOCTL_ADMIN_CMD, &nvme_cmd); + if (status != 0) { + pr_vrb("Failed to read NVMe security receive ioctl() for device /dev/%s, status: %d\n", + fd2kname(disk_fd), status); + return MDADM_STATUS_ERROR; + } + + return MDADM_STATUS_SUCCESS; +} + +/** + * nvme_identify_ioctl() - NVMe identify ioctl. + * @disk_fd: a disk file descriptor. + * @response_buffer: response buffer to fill out. + * @buf_size: response buffer size. + * @verbose: verbose flag. + * + * Based on the documentations TCG Storage Opal SSC 2.01 chapter 3.3.3 and + * NVM ExpressTM Revision 1.4c, chapter 5.25, + * read NVMe identify via ioctl(). + * On success, @response_buffer will be completed. + * + * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise. + */ +static mdadm_status_t +nvme_identify_ioctl(int disk_fd, void *response_buffer, size_t buf_size, const int verbose) +{ + struct nvme_admin_cmd nvme_cmd = {0}; + int status; + + nvme_cmd.opcode = NVME_IDENTIFY; + nvme_cmd.cdw10 = NVME_IDENTIFY_CONTROLLER_DATA; + nvme_cmd.data_len = buf_size; + nvme_cmd.addr = (__u64)response_buffer; + + status = ioctl(disk_fd, NVME_IOCTL_ADMIN_CMD, &nvme_cmd); + if (status != 0) { + pr_vrb("Failed to read NVMe identify ioctl() for device /dev/%s, status: %d\n", + fd2kname(disk_fd), status); + return MDADM_STATUS_ERROR; + } + + return MDADM_STATUS_SUCCESS; +} + +/** + * is_sec_prot_01h_supported() - check if security protocol 01h supported. + * @security_protocols: struct with response from disk (NVMe, SATA) describing supported + * security protocols. + * + * Return: true if TCG_SECP_01 found, false otherwise. + */ +static bool is_sec_prot_01h_supported(supported_security_protocols_t *security_protocols) +{ + int list_length = be16toh(security_protocols->list_length); + int index; + + for (index = 0 ; index < list_length; index++) { + if (security_protocols->list[index] == TCG_SECP_01) + return true; + } + + return false; +} + +/** + * is_sec_prot_01h_supported_nvme() - check if security protocol 01h supported for given NVMe disk. + * @disk_fd: a disk file descriptor. + * @verbose: verbose flag. + * + * Return: %DRIVE_FEAT_SUP_ST if TCG_SECP_01 supported, %DRIVE_FEAT_NOT_SUP_ST if not supported, + * %DRIVE_FEAT_CHECK_FAILED_ST if failed to check. + */ +static drive_feat_sup_st is_sec_prot_01h_supported_nvme(int disk_fd, const int verbose) +{ + supported_security_protocols_t security_protocols = {0}; + + /* security_protocol: TCG_SECP_00, comm_id: not applicable */ + if (nvme_security_recv_ioctl(disk_fd, TCG_SECP_00, 0x0, &security_protocols, + sizeof(security_protocols), verbose)) + return DRIVE_FEAT_CHECK_FAILED_ST; + + if (is_sec_prot_01h_supported(&security_protocols)) + return DRIVE_FEAT_SUP_ST; + + return DRIVE_FEAT_NOT_SUP_ST; +} + +/** + * is_nvme_sec_send_recv_supported() - check if Security Send and Security Receive is supported. + * @disk_fd: a disk file descriptor. + * @verbose: verbose flag. + * + * Check if "Optional Admin Command Support" bit 0 is set in NVMe identify. + * Bit 0 set to 1 means controller supports the Security Send and Security Receive commands. + * + * Return: %DRIVE_FEAT_SUP_ST if security send/receive supported, + * %DRIVE_FEAT_NOT_SUP_ST if not supported, %DRIVE_FEAT_CHECK_FAILED_ST if check failed. + */ +static drive_feat_sup_st is_nvme_sec_send_recv_supported(int disk_fd, const int verbose) +{ + nvme_identify_ctrl_t nvme_identify = {0}; + int status = 0; + + status = nvme_identify_ioctl(disk_fd, &nvme_identify, sizeof(nvme_identify), verbose); + if (status) + return DRIVE_FEAT_CHECK_FAILED_ST; + + if ((__le16_to_cpu(nvme_identify.oacs) & 0x1) == 0x1) + return DRIVE_FEAT_SUP_ST; + + return DRIVE_FEAT_NOT_SUP_ST; +} + +/** + * get_opal_encryption_information() - get Opal encryption information. + * @buffer: buffer with Opal Level 0 Discovery response. + * @information: struct to fill out, describing encryption status of disk. + * + * If Locking feature frame is in response from Opal Level 0 discovery, &encryption_information_t + * structure is completed with status and ability otherwise the status is set to &None. + * For possible encryption statuses and abilities, + * please refer to enums &encryption_status and &encryption_ability. + * + * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise. + */ +static mdadm_status_t get_opal_encryption_information(__u8 *buffer, + encryption_information_t *information) +{ + opal_locking_feature_t *opal_locking_feature = + get_opal_locking_feature_description(buffer); + + if (!opal_locking_feature) + return MDADM_STATUS_ERROR; + + if (opal_locking_feature->locking_supported == 1) { + information->ability = ENC_ABILITY_SED; + + if (opal_locking_feature->locking_enabled == 0) + information->status = ENC_STATUS_UNENCRYPTED; + else if (opal_locking_feature->locked == 1) + information->status = ENC_STATUS_LOCKED; + else + information->status = ENC_STATUS_UNLOCKED; + } else { + information->ability = ENC_ABILITY_NONE; + information->status = ENC_STATUS_UNENCRYPTED; + } + + return MDADM_STATUS_SUCCESS; +} + +/** + * get_nvme_opal_encryption_information() - get NVMe Opal encryption information. + * @disk_fd: a disk file descriptor. + * @information: struct to fill out, describing encryption status of disk. + * @verbose: verbose flag. + * + * In case the disk supports Opal Level 0 discovery, &encryption_information_t structure + * is completed with status and ability based on ioctl response, + * otherwise the ability is set to %ENC_ABILITY_NONE and &status to %ENC_STATUS_UNENCRYPTED. + * As the current use case does not need the knowledge of Opal support, if there is no support, + * %MDADM_STATUS_SUCCESS will be returned, with the values described above. + * For possible encryption statuses and abilities, + * please refer to enums &encryption_status and &encryption_ability. + * + * %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR otherwise. + */ +mdadm_status_t +get_nvme_opal_encryption_information(int disk_fd, encryption_information_t *information, + const int verbose) +{ + __u8 buffer[OPAL_IO_BUFFER_LEN]; + int sec_send_recv_supported = 0; + int protocol_01h_supported = 0; + mdadm_status_t status; + + information->ability = ENC_ABILITY_NONE; + information->status = ENC_STATUS_UNENCRYPTED; + + sec_send_recv_supported = is_nvme_sec_send_recv_supported(disk_fd, verbose); + if (sec_send_recv_supported == DRIVE_FEAT_CHECK_FAILED_ST) + return MDADM_STATUS_ERROR; + + /* Opal not supported */ + if (sec_send_recv_supported == DRIVE_FEAT_NOT_SUP_ST) + return MDADM_STATUS_SUCCESS; + + /** + * sec_send_recv_supported determine that it should be possible to read + * supported sec protocols + */ + protocol_01h_supported = is_sec_prot_01h_supported_nvme(disk_fd, verbose); + if (protocol_01h_supported == DRIVE_FEAT_CHECK_FAILED_ST) + return MDADM_STATUS_ERROR; + + /* Opal not supported */ + if (sec_send_recv_supported == DRIVE_FEAT_SUP_ST && + protocol_01h_supported == DRIVE_FEAT_NOT_SUP_ST) + return MDADM_STATUS_SUCCESS; + + if (nvme_security_recv_ioctl(disk_fd, TCG_SECP_01, OPAL_DISCOVERY_COMID, (void *)&buffer, + OPAL_IO_BUFFER_LEN, verbose)) + return MDADM_STATUS_ERROR; + + status = get_opal_encryption_information((__u8 *)&buffer, information); + if (status) + pr_vrb("Locking feature description not found in Level 0 discovery response. Device /dev/%s.\n", + fd2kname(disk_fd)); + + if (information->ability == ENC_ABILITY_NONE) + assert(information->status == ENC_STATUS_UNENCRYPTED); + + return status; +} + +/** + * ata_pass_through12_ioctl() - ata pass through12 ioctl. + * @disk_fd: a disk file descriptor. + * @ata_command: ata command. + * @sec_protocol: security protocol. + * @comm_id: additional command id. + * @response_buffer: response buffer to fill out. + * @buf_size: response buffer size. + * @verbose: verbose flag. + * + * Based on the documentations ATA Command Pass-Through, chapter 13.2.2 and + * ATA Translation - 3 (SAT-3), send read ata pass through 12 command via ioctl(). + * On success, @response_buffer will be completed. + * + * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR on fail. + */ +static mdadm_status_t +ata_pass_through12_ioctl(int disk_fd, __u8 ata_command, __u8 sec_protocol, __u16 comm_id, + void *response_buffer, size_t buf_size, const int verbose) +{ + __u8 cdb[ATA_INQUIRY_LENGTH] = {0}; + __u8 sense[SG_SENSE_SIZE] = {0}; + __u8 *sense_desc = NULL; + sg_io_hdr_t sg = {0}; + + /* + * ATA Command Pass-Through, chapter 13.2.2 + * SCSI Primary Commands - 4 (SPC-4) + * ATA Translation - 3 (SAT-3) + */ + cdb[0] = ATA_PASS_THROUGH_12; + /* protocol, bits 1-4 */ + cdb[1] = ATA_PIO_DATA_IN << 1; + /* Bytes: CK_COND=1, T_DIR = 1, BYTE_BLOCK = 1, Length in Sector Count = 2 */ + cdb[2] = 0x2E; + cdb[3] = sec_protocol; + /* Sector count */ + cdb[4] = buf_size / DEFAULT_SECTOR_SIZE; + cdb[6] = (comm_id) & 0xFF; + cdb[7] = (comm_id >> 8) & 0xFF; + cdb[9] = ata_command; + + sg.interface_id = SG_INTERFACE_ID; + sg.cmd_len = sizeof(cdb); + sg.mx_sb_len = sizeof(sense); + sg.dxfer_direction = SG_DXFER_FROM_DEV; + sg.dxfer_len = buf_size; + sg.dxferp = response_buffer; + sg.cmdp = cdb; + sg.sbp = sense; + sg.timeout = SG_IO_TIMEOUT; + sg.usr_ptr = NULL; + + if (ioctl(disk_fd, SG_IO, &sg) < 0) { + pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s.\n", fd2kname(disk_fd)); + return MDADM_STATUS_ERROR; + } + + if ((sg.status && sg.status != SG_CHECK_CONDITION) || sg.host_status || + (sg.driver_status && sg.driver_status != SG_DRIVER_SENSE)) { + pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s.\n", fd2kname(disk_fd)); + pr_vrb("SG_IO error: ATA_12 Status: %d Host Status: %d, Driver Status: %d\n", + sg.status, sg.host_status, sg.driver_status); + return MDADM_STATUS_ERROR; + } + + /* verify expected sense response code */ + if (!(sense[0] == SENSE_DATA_CURRENT_DESC || sense[0] == SENSE_DATA_CURRENT_FIXED)) { + pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s.\n", fd2kname(disk_fd)); + return MDADM_STATUS_ERROR; + } + + sense_desc = sense + SENSE_CURRENT_RES_DESC_POS; + /* verify sense data current response with descriptor format */ + if (sense[0] == SENSE_DATA_CURRENT_DESC && + !(sense_desc[0] == ATA_STATUS_RETURN_DESCRIPTOR && + sense_desc[1] == ATA_INQUIRY_LENGTH)) { + pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s. Sense data ASC: %d, ASCQ: %d.\n", + fd2kname(disk_fd), sense[2], sense[3]); + return MDADM_STATUS_ERROR; + } + + /* verify sense data current response with fixed format */ + if (sense[0] == SENSE_DATA_CURRENT_FIXED && + !(sense[12] == ATA_PT_INFORMATION_AVAILABLE_ASC && + sense[13] == ATA_PT_INFORMATION_AVAILABLE_ASCQ)) { + pr_vrb("Failed ata passthrough12 ioctl. Device: /dev/%s. Sense data ASC: %d, ASCQ: %d.\n", + fd2kname(disk_fd), sense[12], sense[13]); + return MDADM_STATUS_ERROR; + } + + return MDADM_STATUS_SUCCESS; +} + +/** + * is_sec_prot_01h_supported_ata() - check if security protocol 01h supported for given SATA disk. + * @disk_fd: a disk file descriptor. + * @verbose: verbose flag. + * + * Return: %DRIVE_FEAT_SUP_ST if TCG_SECP_01 supported, %DRIVE_FEAT_NOT_SUP_ST if not supported, + * %DRIVE_FEAT_CHECK_FAILED_ST if failed. + */ +static drive_feat_sup_st is_sec_prot_01h_supported_ata(int disk_fd, const int verbose) +{ + supported_security_protocols_t security_protocols; + + mdadm_status_t result = ata_pass_through12_ioctl(disk_fd, ATA_TRUSTED_RECEIVE, TCG_SECP_00, + 0x0, &security_protocols, + sizeof(security_protocols), verbose); + if (result) + return DRIVE_FEAT_CHECK_FAILED_ST; + + if (is_sec_prot_01h_supported(&security_protocols)) + return DRIVE_FEAT_SUP_ST; + + return DRIVE_FEAT_NOT_SUP_ST; +} + +/** + * is_ata_trusted_computing_supported() - check if ata trusted computing supported. + * @buffer: buffer with ATA identify response, not NULL. + * + * Return: true if trusted computing bit set, false otherwise. + */ +bool is_ata_trusted_computing_supported(__u16 *buffer) +{ + /* Added due to warnings from the compiler about a possible uninitialized variable below. */ + assert(buffer); + + __u16 security_tc_frame = __le16_to_cpu(buffer[ATA_TRUSTED_COMPUTING_POS]); + ata_trusted_computing_t *security_tc = (ata_trusted_computing_t *)&security_tc_frame; + + if (security_tc->tc_feature == 1) + return true; + + return false; +} + +/** + * get_ata_standard_security_status() - get ATA disk encryption information from ATA identify. + * @buffer: buffer with response from ATA identify, not NULL. + * @information: struct to fill out, describing encryption status of disk. + * + * The function based on the Security status frame from ATA identify, + * completed encryption information. + * For possible encryption statuses and abilities, + * please refer to enums &encryption_status and &encryption_ability. + * + * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR on fail. + */ +static mdadm_status_t get_ata_standard_security_status(__u16 *buffer, + struct encryption_information *information) +{ + /* Added due to warnings from the compiler about a possible uninitialized variable below. */ + assert(buffer); + + __u16 security_status_frame = __le16_to_cpu(buffer[ATA_SECURITY_WORD_POSITION]); + ata_security_status_t *security_status = (ata_security_status_t *)&security_status_frame; + + if (!security_status->security_supported) { + information->ability = ENC_ABILITY_NONE; + information->status = ENC_STATUS_UNENCRYPTED; + + return MDADM_STATUS_SUCCESS; + } + + information->ability = ENC_ABILITY_OTHER; + + if (security_status->security_enabled == 0) + information->status = ENC_STATUS_UNENCRYPTED; + else if (security_status->security_locked == 1) + information->status = ENC_STATUS_LOCKED; + else + information->status = ENC_STATUS_UNLOCKED; + + return MDADM_STATUS_SUCCESS; +} + +/** + * is_ata_opal() - check if SATA disk support Opal. + * @disk_fd: a disk file descriptor. + * @buffer: buffer with ATA identify response. + * @verbose: verbose flag. + * + * Return: %DRIVE_FEAT_SUP_ST if TCG_SECP_01 supported, %DRIVE_FEAT_NOT_SUP_ST if not supported, + * %DRIVE_FEAT_CHECK_FAILED_ST if failed to check. + */ +static drive_feat_sup_st is_ata_opal(int disk_fd, __u16 *buffer_identify, const int verbose) +{ + bool tc_status = is_ata_trusted_computing_supported(buffer_identify); + drive_feat_sup_st tcg_sec_prot_status; + + if (!tc_status) + return DRIVE_FEAT_NOT_SUP_ST; + + tcg_sec_prot_status = is_sec_prot_01h_supported_ata(disk_fd, verbose); + + if (tcg_sec_prot_status == DRIVE_FEAT_CHECK_FAILED_ST) { + pr_vrb("Failed to verify if security protocol 01h supported. Device /dev/%s.\n", + fd2kname(disk_fd)); + return DRIVE_FEAT_CHECK_FAILED_ST; + } + + if (tc_status && tcg_sec_prot_status == DRIVE_FEAT_SUP_ST) + return DRIVE_FEAT_SUP_ST; + + return DRIVE_FEAT_NOT_SUP_ST; +} + +/** + * get_ata_encryption_information() - get ATA disk encryption information. + * @disk_fd: a disk file descriptor. + * @information: struct to fill out, describing encryption status of disk. + * @verbose: verbose flag. + * + * The function reads information about encryption, if the disk supports Opal, + * the information is completed based on Opal Level 0 discovery, otherwise, + * based on ATA security status frame from ATA identification response. + * For possible encryption statuses and abilities, + * please refer to enums &encryption_status and &encryption_ability. + * + * Based on the documentations ATA/ATAPI Command Set ATA8-ACS and + * AT Attachment-8 - ATA Serial Transport (ATA8-AST). + * + * Return: %MDADM_STATUS_SUCCESS on success, %MDADM_STATUS_ERROR on fail. + */ +mdadm_status_t +get_ata_encryption_information(int disk_fd, struct encryption_information *information, + const int verbose) +{ + __u8 buffer_opal_level0_discovery[OPAL_IO_BUFFER_LEN] = {0}; + __u16 buffer_identify[ATA_IDENTIFY_RESPONSE_LEN] = {0}; + drive_feat_sup_st ata_opal_status; + mdadm_status_t status; + + /* Get disk ATA identification */ + status = ata_pass_through12_ioctl(disk_fd, ATA_IDENTIFY, 0x0, 0x0, buffer_identify, + sizeof(buffer_identify), verbose); + if (status == MDADM_STATUS_ERROR) + return MDADM_STATUS_ERROR; + + /* Possible OPAL support, further checks require tpm_enabled.*/ + if (is_ata_trusted_computing_supported(buffer_identify)) { + /* OPAL SATA encryption checking disabled. */ + if (conf_get_sata_opal_encryption_no_verify()) + return MDADM_STATUS_SUCCESS; + + if (!sysfs_is_libata_allow_tpm_enabled(verbose)) { + pr_vrb("Detected SATA drive /dev/%s with Trusted Computing support.\n", + fd2kname(disk_fd)); + pr_vrb("Cannot verify encryption state. Requires libata.tpm_enabled=1.\n"); + return MDADM_STATUS_ERROR; + } + } + + ata_opal_status = is_ata_opal(disk_fd, buffer_identify, verbose); + if (ata_opal_status == DRIVE_FEAT_CHECK_FAILED_ST) + return MDADM_STATUS_ERROR; + + if (ata_opal_status == DRIVE_FEAT_NOT_SUP_ST) + return get_ata_standard_security_status(buffer_identify, information); + + /* SATA Opal */ + status = ata_pass_through12_ioctl(disk_fd, ATA_TRUSTED_RECEIVE, TCG_SECP_01, + OPAL_DISCOVERY_COMID, buffer_opal_level0_discovery, + OPAL_IO_BUFFER_LEN, verbose); + if (status != MDADM_STATUS_SUCCESS) + return MDADM_STATUS_ERROR; + + return get_opal_encryption_information(buffer_opal_level0_discovery, information); +} diff --git a/drive_encryption.h b/drive_encryption.h new file mode 100644 index 0000000..0cb8ff1 --- /dev/null +++ b/drive_encryption.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Read encryption information for Opal and ATA devices. + * + * Copyright (C) 2024 Intel Corporation + * Author: Blazej Kucman + */ + +typedef enum encryption_status { + /* The drive is not currently encrypted. */ + ENC_STATUS_UNENCRYPTED = 0, + /* The drive is encrypted and the data is not accessible. */ + ENC_STATUS_LOCKED, + /* The drive is encrypted but the data is accessible in unencrypted form. */ + ENC_STATUS_UNLOCKED +} encryption_status_t; + +typedef enum encryption_ability { + ENC_ABILITY_NONE = 0, + ENC_ABILITY_OTHER, + /* Self encrypted drive */ + ENC_ABILITY_SED +} encryption_ability_t; + +typedef struct encryption_information { + encryption_ability_t ability; + encryption_status_t status; +} encryption_information_t; + +mdadm_status_t +get_nvme_opal_encryption_information(int disk_fd, struct encryption_information *information, + const int verbose); +mdadm_status_t +get_ata_encryption_information(int disk_fd, struct encryption_information *information, + const int verbose); +const char *get_encryption_ability_string(enum encryption_ability ability); +const char *get_encryption_status_string(enum encryption_status status); diff --git a/external-reshape-design.txt b/external-reshape-design.txt deleted file mode 100644 index e4cf4e1..0000000 --- a/external-reshape-design.txt +++ /dev/null @@ -1,280 +0,0 @@ -External Reshape - -1 Problem statement - -External (third-party metadata) reshape differs from native-metadata -reshape in three key ways: - -1.1 Format specific constraints - -In the native case reshape is limited by what is implemented in the -generic reshape routine (Grow_reshape()) and what is supported by the -kernel. There are exceptional cases where Grow_reshape() may block -operations when it knows that the kernel implementation is broken, but -otherwise the kernel is relied upon to be the final arbiter of what -reshape operations are supported. - -In the external case the kernel, and the generic checks in -Grow_reshape(), become the super-set of what reshapes are possible. The -metadata format may not support, or have yet to implement a given -reshape type. The implication for Grow_reshape() is that it must query -the metadata handler and effect changes in the metadata before the new -geometry is posted to the kernel. The ->reshape_super method allows -Grow_reshape() to validate the requested operation and post the metadata -update. - -1.2 Scope of reshape - -Native metadata reshape is always performed at the array scope (no -metadata relationship with sibling arrays on the same disks). External -reshape, depending on the format, may not allow the number of member -disks to be changed in a subarray unless the change is simultaneously -applied to all subarrays in the container. For example the imsm format -requires all member disks to be a member of all subarrays, so a 4-disk -raid5 in a container that also houses a 4-disk raid10 array could not be -reshaped to 5 disks as the imsm format does not support a 5-disk raid10 -representation. This requires the ->reshape_super method to check the -contents of the array and ask the user to run the reshape at container -scope (if all subarrays are agreeable to the change), or report an -error in the case where one subarray cannot support the change. - -1.3 Monitoring / checkpointing - -Reshape, unlike rebuild/resync, requires strict checkpointing to survive -interrupted reshape operations. For example when expanding a raid5 -array the first few stripes of the array will be overwritten in a -destructive manner. When restarting the reshape process we need to know -the exact location of the last successfully written stripe, and we need -to restore the data in any partially overwritten stripe. Native -metadata stores this backup data in the unused portion of spares that -are being promoted to array members, or in an external backup file -(located on a non-involved block device). - -The kernel is in charge of recording checkpoints of reshape progress, -but mdadm is delegated the task of managing the backup space which -involves: -1/ Identifying what data will be overwritten in the next unit of reshape - operation -2/ Suspending access to that region so that a snapshot of the data can - be transferred to the backup space. -3/ Allowing the kernel to reshape the saved region and setting the - boundary for the next backup. - -In the external reshape case we want to preserve this mdadm -'reshape-manager' arrangement, but have a third actor, mdmon, to -consider. It is tempting to give the role of managing reshape to mdmon, -but that is counter to its role as a monitor, and conflicts with the -existing capabilities and role of mdadm to manage the progress of -reshape. For clarity the external reshape implementation maintains the -role of mdmon as a (mostly) passive recorder of raid events, and mdadm -treats it as it would the kernel in the native reshape case (modulo -needing to send explicit metadata update messages and checking that -mdmon took the expected action). - -External reshape can use the generic md backup file as a fallback, but in the -optimal/firmware-compatible case the reshape-manager will use the metadata -specific areas for managing reshape. The implementation also needs to spawn a -reshape-manager per subarray when the reshape is being carried out at the -container level. For these two reasons the ->manage_reshape() method is -introduced. This method in addition to base tasks mentioned above: -1/ Processed each subarray one at a time in series - where appropriate. -2/ Uses either generic routines in Grow.c for md-style backup file - support, or uses the metadata-format specific location for storing - recovery data. -This aims to avoid a "midlayer mistake"[1] and lets the metadata handler -optionally take advantage of generic infrastructure in Grow.c - -2 Details for specific reshape requests - -There are quite a few moving pieces spread out across md, mdadm, and mdmon for -the support of external reshape, and there are several different types of -reshape that need to be comprehended by the implementation. A rundown of -these details follows. - -2.0 General provisions: - -Obtain an exclusive open on the container to make sure we are not -running concurrently with a Create() event. - -2.1 Freezing sync_action - - Before making any attempt at a reshape we 'freeze' every array in - the container to ensure no spare assignment or recovery happens. - This involves writing 'frozen' to sync_action and changing the '/' - after 'external:' in metadata_version to a '-'. mdmon knows that - this means not to perform any management. - - Before doing this we check that all sync_actions are 'idle', which - is racy but still useful. - Afterwards we check that all member arrays have no spares - or partial spares (recovery_start != 'none') which would indicate a - race. If they do, we unfreeze again. - - Once this completes we know all the arrays are stable. They may - still have failed devices as devices can fail at any time. However - we treat those like failures that happen during the reshape. - -2.2 Reshape size - - 1/ mdadm::Grow_reshape(): checks if mdmon is running and optionally - initializes st->update_tail - 2/ mdadm::Grow_reshape() calls ->reshape_super() to check that the size change - is allowed (being performed at subarray scope / enough room) prepares a - metadata update - 3/ mdadm::Grow_reshape(): flushes the metadata update (via - flush_metadata_update(), or ->sync_metadata()) - 4/ mdadm::Grow_reshape(): post the new size to the kernel - - -2.3 Reshape level (simple-takeover) - -"simple-takeover" implies the level change can be satisfied without touching -sync_action - - 1/ mdadm::Grow_reshape(): checks if mdmon is running and optionally - initializes st->update_tail - 2/ mdadm::Grow_reshape() calls ->reshape_super() to check that the level change - is allowed (being performed at subarray scope) prepares a - metadata update - 2a/ raid10 --> raid0: degrade all mirror legs prior to calling - ->reshape_super - 3/ mdadm::Grow_reshape(): flushes the metadata update (via - flush_metadata_update(), or ->sync_metadata()) - 4/ mdadm::Grow_reshape(): post the new level to the kernel - -2.4 Reshape chunk, layout - -2.5 Reshape raid disks (grow) - - 1/ mdadm::Grow_reshape(): unconditionally initializes st->update_tail - because only redundant raid levels can modify the number of raid disks - 2/ mdadm::Grow_reshape(): calls ->reshape_super() to check that the level - change is allowed (being performed at proper scope / permissible - geometry / proper spares available in the container), chooses - the spares to use, and prepares a metadata update. - 3/ mdadm::Grow_reshape(): Converts each subarray in the container to the - raid level that can perform the reshape and starts mdmon. - 4/ mdadm::Grow_reshape(): Pushes the update to mdmon. - 5/ mdadm::Grow_reshape(): uses container_content to find details of - the spares and passes them to the kernel. - 6/ mdadm::Grow_reshape(): gives raid_disks update to the kernel, - sets sync_max, sync_min, suspend_lo, suspend_hi all to zero, - and starts the reshape by writing 'reshape' to sync_action. - 7/ mdmon::monitor notices the sync_action change and tells - managemon to check for new devices. managemon notices the new - devices, opens relevant sysfs file, and passes them all to - monitor. - 8/ mdadm::Grow_reshape() calls ->manage_reshape to oversee the - rest of the reshape. - - 9/ mdadm::->manage_reshape(): saves data that will be overwritten by - the kernel to either the backup file or the metadata specific location, - advances sync_max, waits for reshape, ping mdmon, repeat. - Meanwhile mdmon::read_and_act(): records checkpoints. - Specifically. - - 9a/ if the 'next' stripe to be reshaped will over-write - itself during reshape then: - 9a.1/ increase suspend_hi to cover a suitable number of - stripes. - 9a.2/ backup those stripes safely. - 9a.3/ advance sync_max to allow those stripes to be backed up - 9a.4/ when sync_completed indicates that those stripes have - been reshaped, manage_reshape must ping_manager - 9a.5/ when mdmon notices that sync_completed has been updated, - it records the new checkpoint in the metadata - 9a.6/ after the ping_manager, manage_reshape will increase - suspend_lo to allow access to those stripes again - - 9b/ if the 'next' stripe to be reshaped will over-write unused - space during reshape then we apply same process as above, - except that there is no need to back anything up. - Note that we *do* need to keep suspend_hi progressing as - it is not safe to write to the area-under-reshape. For - kernel-managed-metadata this protection is provided by - ->reshape_safe, but that does not protect us in the case - of user-space-managed-metadata. - - 10/ mdadm::->manage_reshape(): Once reshape completes changes the raid - level back to the nominal raid level (if necessary) - - FIXME: native metadata does not have the capability to record the original - raid level in reshape-restart case because the kernel always records current - raid level to the metadata, whereas external metadata can masquerade at an - alternate level based on the reshape state. - -2.6 Reshape raid disks (shrink) - -3 Interaction with metadata handle. - - The following calls are made into the metadata handler to assist - with initiating and monitoring a 'reshape'. - - 1/ ->reshape_super is called quite early (after only minimial - checks) to make sure that the metadata can record the new shape - and any necessary transitions. It may be passed a 'container' - or an individual array within a container, and it should notice - the difference and act accordingly. - When a reshape is requested against a container it is expected - that it should be applied to every array in the container, - however it is up to the metadata handler to determine final - policy. - - If the reshape is supportable, the internal copy of the metadata - should be updated, and a metadata update suitable for sending - to mdmon should be queued. - - If the reshape will involve converting spares into array members, - this must be recorded in the metadata too. - - 2/ ->container_content will be called to find out the new state - of all the array, or all arrays in the container. Any newly - added devices (with state==0 and raid_disk >= 0) will be added - to the array as spares with the relevant slot number. - - It is likely that the info returned by ->container_content will - have ->reshape_active set, ->reshape_progress set to e.g. 0, and - new_* set appropriately. mdadm will use this information to - cause the correct reshape to start at an appropriate time. - - 3/ ->set_array_state will be called by mdmon when reshape has - started and again periodically as it progresses. This should - record the ->last_checkpoint as the point where reshape has - progressed to. When the reshape finished this will be called - again and it should notice that ->curr_action is no longer - 'reshape' and so should record that the reshape has finished - providing 'last_checkpoint' has progressed suitably. - - 4/ ->manage_reshape will be called once the reshape has been set - up in the kernel but before sync_max has been moved from 0, so - no actual reshape will have happened. - - ->manage_reshape should call progress_reshape() to allow the - reshape to progress, and should back-up any data as indicated - by the return value. See the documentation of that function - for more details. - ->manage_reshape will be called multiple times when a - container is being reshaped, once for each member array in - the container. - - - The progress of the metadata is as follows: - 1/ mdadm sends a metadata update to mdmon which marks the array - as undergoing a reshape. This is set up by - ->reshape_super and applied by ->process_update - For container-wide reshape, this happens once for the whole - container. - 2/ mdmon notices progress via the sysfs files and calls - ->set_array_state to update the state periodically - For container-wide reshape, this happens repeatedly for - one array, then repeatedly for the next, etc. - 3/ mdmon notices when reshape has finished and call - ->set_array_state to record the the reshape is complete. - For container-wide reshape, this happens once for each - member array. - - - -... - -[1]: Linux kernel design patterns - part 3, Neil Brown https://lwn.net/Articles/336262/ diff --git a/inventory b/inventory deleted file mode 100755 index c4801b4..0000000 --- a/inventory +++ /dev/null @@ -1,284 +0,0 @@ - -.gitignore -ANNOUNCE-3.0 -ANNOUNCE-3.0.1 -ANNOUNCE-3.0.2 -ANNOUNCE-3.0.3 -ANNOUNCE-3.1 -ANNOUNCE-3.1.1 -ANNOUNCE-3.1.2 -ANNOUNCE-3.1.3 -ANNOUNCE-3.1.4 -ANNOUNCE-3.1.5 -ANNOUNCE-3.2 -ANNOUNCE-3.2.1 -ANNOUNCE-3.2.2 -ANNOUNCE-3.2.3 -ANNOUNCE-3.2.4 -ANNOUNCE-3.2.5 -ANNOUNCE-3.2.6 -ANNOUNCE-3.3 -ANNOUNCE-3.3.1 -ANNOUNCE-3.3.2 -ANNOUNCE-3.3.3 -ANNOUNCE-3.3.4 -ANNOUNCE-3.4 -ANNOUNCE-4.0 -ANNOUNCE-4.1 -ANNOUNCE-4.2 -Assemble.c -Build.c -COPYING -ChangeLog -Create.c -Detail.c -Dump.c -Examine.c -Grow.c -INSTALL -Incremental.c -Kill.c -Makefile -Manage.c -Monitor.c -Query.c -README.initramfs -ReadMe.c -TODO -bitmap.c -bitmap.h -clustermd_tests/ -clustermd_tests/00r10_Create -clustermd_tests/00r1_Create -clustermd_tests/01r10_Grow_bitmap-switch -clustermd_tests/01r10_Grow_resize -clustermd_tests/01r1_Grow_add -clustermd_tests/01r1_Grow_bitmap-switch -clustermd_tests/01r1_Grow_resize -clustermd_tests/02r10_Manage_add -clustermd_tests/02r10_Manage_add-spare -clustermd_tests/02r10_Manage_re-add -clustermd_tests/02r1_Manage_add -clustermd_tests/02r1_Manage_add-spare -clustermd_tests/02r1_Manage_re-add -clustermd_tests/03r10_switch-recovery -clustermd_tests/03r10_switch-resync -clustermd_tests/03r1_switch-recovery -clustermd_tests/03r1_switch-resync -clustermd_tests/cluster_conf -clustermd_tests/func.sh -config.c -coverity-gcc-hack.h -crc32.c -crc32.h -crc32c.c -dlink.c -dlink.h -external-reshape-design.txt -inventory -lib.c -makedist -managemon.c -mapfile.c -maps.c -md.4 -md5.h -md_p.h -md_u.h -mdadm.8.in -mdadm.c -mdadm.conf-example -mdadm.conf.5 -mdadm.h -mdadm.spec -mdmon-design.txt -mdmon.8 -mdmon.c -mdmon.h -mdopen.c -mdstat.c -misc/ -misc/mdcheck -misc/syslog-events -mkinitramfs -monitor.c -msg.c -msg.h -part.h -platform-intel.c -platform-intel.h -policy.c -probe_roms.c -probe_roms.h -pwgr.c -raid5extend.c -raid6check.8 -raid6check.c -restripe.c -sg_io.c -sha1.c -sha1.h -super-ddf.c -super-gpt.c -super-intel.c -super-mbr.c -super0.c -super1.c -swap_super.c -sysfs.c -systemd/ -systemd/SUSE-mdadm_env.sh -systemd/mdadm-grow-continue@.service -systemd/mdadm-last-resort@.service -systemd/mdadm-last-resort@.timer -systemd/mdadm.shutdown -systemd/mdcheck_continue.service -systemd/mdcheck_continue.timer -systemd/mdcheck_start.service -systemd/mdcheck_start.timer -systemd/mdmon@.service -systemd/mdmonitor-oneshot.service -systemd/mdmonitor-oneshot.timer -systemd/mdmonitor.service -test -tests/ -tests/00linear -tests/00multipath -tests/00names -tests/00raid0 -tests/00raid1 -tests/00raid10 -tests/00raid4 -tests/00raid5 -tests/00raid6 -tests/00readonly -tests/01r1fail -tests/01r5fail -tests/01r5integ -tests/01raid6integ -tests/01replace -tests/02lineargrow -tests/02r1add -tests/02r1grow -tests/02r5grow -tests/02r6grow -tests/03assem-incr -tests/03r0assem -tests/03r5assem -tests/03r5assem-failed -tests/03r5assemV1 -tests/04r0update -tests/04r1update -tests/04r5swap -tests/04update-metadata -tests/04update-uuid -tests/05r1-add-internalbitmap -tests/05r1-add-internalbitmap-v1a -tests/05r1-add-internalbitmap-v1b -tests/05r1-add-internalbitmap-v1c -tests/05r1-bitmapfile -tests/05r1-failfast -tests/05r1-grow-external -tests/05r1-grow-internal -tests/05r1-grow-internal-1 -tests/05r1-internalbitmap -tests/05r1-internalbitmap-v1a -tests/05r1-internalbitmap-v1b -tests/05r1-internalbitmap-v1c -tests/05r1-n3-bitmapfile -tests/05r1-re-add -tests/05r1-re-add-nosuper -tests/05r1-remove-internalbitmap -tests/05r1-remove-internalbitmap-v1a -tests/05r1-remove-internalbitmap-v1b -tests/05r1-remove-internalbitmap-v1c -tests/05r5-bitmapfile -tests/05r5-internalbitmap -tests/05r6-bitmapfile -tests/05r6tor0 -tests/06name -tests/06sysfs -tests/06wrmostly -tests/07autoassemble -tests/07autodetect -tests/07changelevelintr -tests/07changelevels -tests/07layouts -tests/07reshape5intr -tests/07revert-grow -tests/07revert-inplace -tests/07revert-shrink -tests/07testreshape5 -tests/09imsm-assemble -tests/09imsm-create-fail-rebuild -tests/09imsm-overlap -tests/10ddf-assemble-missing -tests/10ddf-create -tests/10ddf-create-fail-rebuild -tests/10ddf-fail-create-race -tests/10ddf-fail-readd -tests/10ddf-fail-readd-readonly -tests/10ddf-fail-spare -tests/10ddf-fail-stop-readd -tests/10ddf-fail-twice -tests/10ddf-fail-two-spares -tests/10ddf-geometry -tests/10ddf-incremental-wrong-order -tests/10ddf-sudden-degraded -tests/11spare-migration -tests/12imsm-r0_2d-grow-r0_3d -tests/12imsm-r0_2d-grow-r0_4d -tests/12imsm-r0_2d-grow-r0_5d -tests/12imsm-r0_3d-grow-r0_4d -tests/12imsm-r5_3d-grow-r5_4d -tests/12imsm-r5_3d-grow-r5_5d -tests/13imsm-r0_r0_2d-grow-r0_r0_4d -tests/13imsm-r0_r0_2d-grow-r0_r0_5d -tests/13imsm-r0_r0_3d-grow-r0_r0_4d -tests/13imsm-r0_r5_3d-grow-r0_r5_4d -tests/13imsm-r0_r5_3d-grow-r0_r5_5d -tests/13imsm-r5_r0_3d-grow-r5_r0_4d -tests/13imsm-r5_r0_3d-grow-r5_r0_5d -tests/14imsm-r0_3d-r5_3d-migrate-r5_4d-r5_4d -tests/14imsm-r0_3d_no_spares-migrate-r5_3d -tests/14imsm-r0_r0_2d-takeover-r10_4d -tests/14imsm-r10_4d-grow-r10_5d -tests/14imsm-r10_r5_4d-takeover-r0_2d -tests/14imsm-r1_2d-grow-r1_3d -tests/14imsm-r1_2d-takeover-r0_2d -tests/14imsm-r5_3d-grow-r5_5d-no-spares -tests/14imsm-r5_3d-migrate-r4_3d -tests/15imsm-r0_3d_64k-migrate-r0_3d_256k -tests/15imsm-r5_3d_4k-migrate-r5_3d_256k -tests/15imsm-r5_3d_64k-migrate-r5_3d_256k -tests/15imsm-r5_6d_4k-migrate-r5_6d_256k -tests/15imsm-r5_r0_3d_64k-migrate-r5_r0_3d_256k -tests/16imsm-r0_3d-migrate-r5_4d -tests/16imsm-r0_5d-migrate-r5_6d -tests/16imsm-r5_3d-migrate-r0_3d -tests/16imsm-r5_5d-migrate-r0_5d -tests/18imsm-1d-takeover-r0_1d -tests/18imsm-1d-takeover-r1_2d -tests/18imsm-r0_2d-takeover-r10_4d -tests/18imsm-r10_4d-takeover-r0_2d -tests/18imsm-r1_2d-takeover-r0_1d -tests/19raid6auto-repair -tests/19raid6check -tests/19raid6repair -tests/19repair-does-not-destroy -tests/20raid5journal -tests/21raid5cache -tests/ToTest -tests/env-ddf-template -tests/env-imsm-template -tests/func.sh -tests/imsm-grow-template -tests/utils -udev-md-clustered-confirm-device.rules -udev-md-raid-arrays.rules -udev-md-raid-assembly.rules -udev-md-raid-creating.rules -udev-md-raid-safe-timeouts.rules -util.c -uuid.c -xmalloc.c diff --git a/makedist b/makedist deleted file mode 100755 index 0c4b39e..0000000 --- a/makedist +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/sh -# avoid silly sorting -export LANG=C -arg=$1 -target=~/public_html/source/mdadm -if [ " $arg" = " test" ] -then - target=/tmp/mdadm-test - rm -rf $target - mkdir -p $target -fi -if [ -d $target ] -then : -else echo $target is not a directory - exit 2 -fi -set `grep '^#define VERSION' ReadMe.c ` -version=`echo $3 | sed -e 's/"//g'` -grep "^.TH MDADM 8 .. v$version" mdadm.8.in > /dev/null 2>&1 || - { - echo mdadm.8.in does not mention version $version. - exit 1 - } -grep "^.TH MDMON 8 .. v$version" mdmon.8 > /dev/null 2>&1 || - { - echo mdmon.8 does not mention version $version. - exit 1 - } -rpmv=`echo $version | tr - _` -grep "^Version: *$rpmv$" mdadm.spec > /dev/null 2>&1 || - { - echo mdadm.spec does not mention version $version. - exit 1 - } -if [ -f ANNOUNCE-$version ] -then : -else - echo ANNOUNCE-$version does not exist - exit 1 -fi -if grep "^ANNOUNCE-$version\$" inventory -then : -else { cat inventory ; echo ANNOUNCE-$version ; } | sort -o inventory -fi - -echo version = $version -base=mdadm-$rpmv.tar.gz -if [ " $arg" != " diff" ] -then - if [ -f $target/$base ] - then - echo $target/$base exists. - exit 1 - fi - trap "rm $target/$base; exit" 1 2 3 - git archive --prefix=mdadm-$rpmv/ HEAD | gzip --best > $target/$base - chmod a+r $target/$base - ls -l $target/$base - if tar tzf $target/$base | sed 's,[^/]*/,,' | sort | diff -u inventory - - then : correct files found - else echo "Extra files, or inventory is out-of-date" - rm $target/$base - exit 1 - fi - rpmbuild -ta $target/$base || exit 1 - find ~/rpmbuild/RPMS -name "*mdadm-$version-*" \ - -exec cp {} $target/RPM \; - cp ANNOUNCE-$version $target/ANNOUNCE - cp ChangeLog $target/ChangeLog - if [ " $arg" != " test" ] - then - echo -n "Confirm signing this release? " - read a - if [ " $a" != " y" ]; then echo OK - bye. ; exit 1; fi - if zcat $target/$base | gpg -ba > $target/$base.sign && gpg -ba $target/ANNOUNCE - then - kup put $target/$base $target/$base.sign \ - /pub/linux/utils/raid/mdadm/mdadm-$version.tar.gz - kup put $target/ANNOUNCE $target/ANNOUNCE.asc /pub/linux/utils/raid/mdadm/ANNOUNCE - else - echo signing failed - exit 1 - fi - fi -else - if [ ! -f $target/$base ] - then - echo $target/$base does not exist. - exit 1 - fi - ( cd .. ; ln -s mdadm.v2 mdadm-$version ; tar chf - --exclude=.git --exclude="TAGS" --exclude='*,v' --exclude='*~' --exclude='*.o' --exclude mdadm --exclude=mdadm'.[^ch0-9]' --exclude=RCS mdadm-$version ; rm mdadm-$version ) | gzip --best > /var/tmp/mdadm-new.tgz - mkdir /var/tmp/mdadm-old ; zcat $target/$base | ( cd /var/tmp/mdadm-old ; tar xf - ) - mkdir /var/tmp/mdadm-new ; zcat /var/tmp/mdadm-new.tgz | ( cd /var/tmp/mdadm-new ; tar xf - ) - diff -ru /var/tmp/mdadm-old /var/tmp/mdadm-new - rm -rf /var/tmp/mdadm-old /var/tmp/mdadm-new /var/tmp/mdadm-new.tgz -fi diff --git a/mdadm.8.in b/mdadm.8.in index 96a4a08..9ba6682 100644 --- a/mdadm.8.in +++ b/mdadm.8.in @@ -3179,7 +3179,7 @@ environment. This can be useful for testing or for disaster recovery. You should be aware that interoperability may be compromised by setting this value. -These change can also be suppressed by adding +These change can also be suppressed by adding .B mdadm.imsm.test=1 to the kernel command line. This makes it easy to test IMSM code in a virtual machine that doesn't have IMSM virtual hardware. @@ -3454,6 +3454,25 @@ is any string. These names are supported by since version 3.3 provided they are enabled in .IR mdadm.conf . +.SH UNDERSTANDING OUTPUT + +.TP +EXAMINE + +.TP +.B checkpoint +Checkpoint value is reported when array is performing some action including +resync, recovery or reshape. Checkpoints allow resuming action from certain +point if it was interrupted. + +Checkpoint is reported as combination of two values: current migration unit +and number of blocks per unit. By multiplying those values and dividing by +array size checkpoint progress percentage can be obtained in relation to +current progress reported in /proc/mdstat. Checkpoint is also related to (and +sometimes based on) sysfs entry sync_completed but depending on action units +may differ. Even if units are the same, it should not be expected that +checkpoint and sync_completed will be exact match nor updated simultaneously. + .SH NOTE .I mdadm was previously known as diff --git a/mdadm.conf-example b/mdadm.conf-example deleted file mode 100644 index 35a75d1..0000000 --- a/mdadm.conf-example +++ /dev/null @@ -1,65 +0,0 @@ -# mdadm configuration file -# -# mdadm will function properly without the use of a configuration file, -# but this file is useful for keeping track of arrays and member disks. -# In general, a mdadm.conf file is created, and updated, after arrays -# are created. This is the opposite behavior of /etc/raidtab which is -# created prior to array construction. -# -# -# the config file takes two types of lines: -# -# DEVICE lines specify a list of devices of where to look for -# potential member disks -# -# ARRAY lines specify information about how to identify arrays so -# so that they can be activated -# -# You can have more than one device line and use wild cards. The first -# example includes SCSI the first partition of SCSI disks /dev/sdb, -# /dev/sdc, /dev/sdd, /dev/sdj, /dev/sdk, and /dev/sdl. The second -# line looks for array slices on IDE disks. -# -#DEVICE /dev/sd[bcdjkl]1 -#DEVICE /dev/hda1 /dev/hdb1 -# -# If you mount devfs on /dev, then a suitable way to list all devices is: -#DEVICE /dev/discs/*/* -# -# -# The AUTO line can control which arrays get assembled by auto-assembly, -# meaing either "mdadm -As" when there are no 'ARRAY' lines in this file, -# or "mdadm --incremental" when the array found is not listed in this file. -# By default, all arrays that are found are assembled. -# If you want to ignore all DDF arrays (maybe they are managed by dmraid), -# and only assemble 1.x arrays if which are marked for 'this' homehost, -# but assemble all others, then use -#AUTO -ddf homehost -1.x +all -# -# ARRAY lines specify an array to assemble and a method of identification. -# Arrays can currently be identified by using a UUID, superblock minor number, -# or a listing of devices. -# -# super-minor is usually the minor number of the metadevice -# UUID is the Universally Unique Identifier for the array -# Each can be obtained using -# -# mdadm -D -# -#ARRAY /dev/md0 UUID=3aaa0122:29827cfa:5331ad66:ca767371 -#ARRAY /dev/md1 super-minor=1 -#ARRAY /dev/md2 devices=/dev/hda1,/dev/hdb1 -# -# ARRAY lines can also specify a "spare-group" for each array. mdadm --monitor -# will then move a spare between arrays in a spare-group if one array has a failed -# drive but no spare -#ARRAY /dev/md4 uuid=b23f3c6d:aec43a9f:fd65db85:369432df spare-group=group1 -#ARRAY /dev/md5 uuid=19464854:03f71b1b:e0df2edd:246cc977 spare-group=group1 -# -# When used in --follow (aka --monitor) mode, mdadm needs a -# mail address and/or a program. This can be given with "mailaddr" -# and "program" lines to that monitoring can be started using -# mdadm --follow --scan & echo $! > /run/mdadm/mon.pid -# If the lines are not found, mdadm will exit quietly -#MAILADDR root@mydomain.tld -#PROGRAM /usr/sbin/handle-mdadm-events diff --git a/mdadm.conf.5.in b/mdadm.conf.5.in index 787e51e..14302a9 100644 --- a/mdadm.conf.5.in +++ b/mdadm.conf.5.in @@ -636,6 +636,20 @@ If multiple .B MINITORDELAY lines are provided, only first non-zero value is considered. +.TP +.B ENCRYPTION_NO_VERIFY +The +.B ENCRYPTION_NO_VERIFY +disables encryption verification for devices with particular encryption support detected. +Currently, only verification of SATA OPAL encryption can be disabled. +It does not disable ATA security encryption verification. +Currently effective only for +.I IMSM +metadata. +Available parameter +.I "sata_opal". + + .SH FILES .SS {CONFFILE} @@ -744,6 +758,8 @@ SYSFS uuid=bead5eb6:31c17a27:da120ba2:7dfda40d group_thread_cnt=4 sync_speed_max=1000000 .br MONITORDELAY 60 +.br +ENCRYPTION_NO_VERIFY sata_opal .SH SEE ALSO .BR mdadm (8), diff --git a/mdadm.h b/mdadm.h index 1f28b3e..2640b39 100644 --- a/mdadm.h +++ b/mdadm.h @@ -430,8 +430,10 @@ struct createinfo { }; struct spare_criteria { + bool criteria_set; unsigned long long min_size; unsigned int sector_size; + struct dev_policy *pols; }; typedef enum mdadm_status { @@ -776,6 +778,8 @@ enum sysfs_read_flags { #define SYSFS_MAX_BUF_SIZE 64 +extern void sysfs_get_container_devnm(struct mdinfo *mdi, char *buf); + /* If fd >= 0, get the array it is open on, * else use devnm. */ @@ -807,7 +811,7 @@ extern int sysfs_attribute_available(struct mdinfo *sra, struct mdinfo *dev, extern int sysfs_get_str(struct mdinfo *sra, struct mdinfo *dev, char *name, char *val, int size); extern int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms); -extern int sysfs_set_array(struct mdinfo *info, int vers); +extern int sysfs_set_array(struct mdinfo *info); extern int sysfs_add_disk(struct mdinfo *sra, struct mdinfo *sd, int resume); extern int sysfs_disk_to_scsi_id(int fd, __u32 *id); extern int sysfs_unique_holder(char *devnm, long rdev); @@ -849,6 +853,7 @@ extern int restore_stripes(int *dest, unsigned long long *offsets, int source, unsigned long long read_offset, unsigned long long start, unsigned long long length, char *src_buf); +extern bool sysfs_is_libata_allow_tpm_enabled(const int verbose); #ifndef Sendmail #define Sendmail "/usr/lib/sendmail -t" @@ -937,6 +942,23 @@ struct reshape { unsigned long long new_size; /* New size of array in sectors */ }; +/** + * struct dev_policy - Data structure for policy management. + * @next: pointer to next dev_policy. + * @name: policy name, category. + * @metadata: the metadata type it affects. + * @value: value of the policy. + * + * The functions to manipulate dev_policy lists do not free elements, so they must be statically + * allocated. @name and @metadata can be compared by address. + */ +typedef struct dev_policy { + struct dev_policy *next; + char *name; + const char *metadata; + const char *value; +} dev_policy_t; + /* A superswitch provides entry point to a metadata handler. * * The superswitch primarily operates on some "metadata" that @@ -1115,10 +1137,9 @@ extern struct superswitch { * Return spare criteria for array: * - minimum disk size can be used in array; * - sector size can be used in array. - * Return values: 0 - for success and -EINVAL on error. */ - int (*get_spare_criteria)(struct supertype *st, - struct spare_criteria *sc); + mdadm_status_t (*get_spare_criteria)(struct supertype *st, char *mddev_path, + struct spare_criteria *sc); /* Find somewhere to put a bitmap - possibly auto-size it - and * update the metadata to record this. The array may be newly * created, in which case data_size may be updated, or it might @@ -1166,6 +1187,25 @@ extern struct superswitch { char *subdev, unsigned long long *freesize, int consistency_policy, int verbose); + /** + * test_and_add_drive_policies() - test new and add custom policies from metadata handler. + * @pols: list of currently recorded policies. + * @disk_fd: file descriptor of the device to check. + * @verbose: verbose flag. + * + * Used by IMSM to verify all drives in container/array, against requirements not recored + * in superblock, like controller type for IMSM. It should check all drives even if + * they are not actually used, because mdmon or kernel are free to use any drive assigned to + * container automatically. + * + * Generating and comparison methods belong to metadata handler. It is not mandatory to be + * implemented. + * + * Return: MDADM_STATUS_SUCCESS is expected on success. + */ + mdadm_status_t (*test_and_add_drive_policies)(dev_policy_t **pols, int disk_fd, + const int verbose); + /* Return a linked list of 'mdinfo' structures for all arrays * in the container. For non-containers, it is like * getinfo_super with an allocated mdinfo.*/ @@ -1247,21 +1287,6 @@ extern struct superswitch { */ struct mdinfo *(*activate_spare)(struct active_array *a, struct metadata_update **updates); - /* - * Return statically allocated string that represents metadata specific - * controller domain of the disk. The domain is used in disk domain - * matching functions. Disks belong to the same domain if the they have - * the same domain from mdadm.conf and belong the same metadata domain. - * Returning NULL or not providing this handler means that metadata - * does not distinguish the differences between disks that belong to - * different controllers. They are in the domain specified by - * configuration file (mdadm.conf). - * In case when the metadata has the notion of domains based on disk - * it shall return NULL for disks that do not belong to the controller - * the supported domains. Such disks will form another domain and won't - * be mixed with supported ones. - */ - const char *(*get_disk_controller_domain)(const char *path); /* for external backup area */ int (*recover_backup)(struct supertype *st, struct mdinfo *info); @@ -1368,27 +1393,8 @@ extern struct supertype *dup_super(struct supertype *st); extern int get_dev_size(int fd, char *dname, unsigned long long *sizep); extern int get_dev_sector_size(int fd, char *dname, unsigned int *sectsizep); extern int must_be_container(int fd); -extern int dev_size_from_id(dev_t id, unsigned long long *size); -extern int dev_sector_size_from_id(dev_t id, unsigned int *size); void wait_for(char *dev, int fd); -/* - * Data structures for policy management. - * Each device can have a policy structure that lists - * various name/value pairs each possibly with a metadata associated. - * The policy list is sorted by name/value/metadata - */ -struct dev_policy { - struct dev_policy *next; - char *name; /* None of these strings are allocated. They are - * all just references to strings which are known - * to exist elsewhere. - * name and metadata can be compared by address equality. - */ - const char *metadata; - const char *value; -}; - extern char pol_act[], pol_domain[], pol_metadata[], pol_auto[]; /* iterate over the sublist starting at list, having the same @@ -1430,10 +1436,16 @@ extern struct dev_policy *disk_policy(struct mdinfo *disk); extern struct dev_policy *devid_policy(int devid); extern void dev_policy_free(struct dev_policy *p); -//extern void pol_new(struct dev_policy **pol, char *name, char *val, char *metadata); extern void pol_add(struct dev_policy **pol, char *name, char *val, char *metadata); extern struct dev_policy *pol_find(struct dev_policy *pol, char *name); +extern mdadm_status_t drive_test_and_add_policies(struct supertype *st, dev_policy_t **pols, + int fd, const int verbose); +extern mdadm_status_t sysfs_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols, + struct mdinfo *mdi, const int verbose); +extern mdadm_status_t mddev_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols, + int array_fd, const int verbose); + enum policy_action { act_default, act_include, @@ -1661,6 +1673,7 @@ extern char *conf_get_program(void); extern char *conf_get_homehost(int *require_homehostp); extern char *conf_get_homecluster(void); extern int conf_get_monitor_delay(void); +extern bool conf_get_sata_opal_encryption_no_verify(void); extern char *conf_line(FILE *file); extern char *conf_word(FILE *file, int allow_key); extern void print_quoted(char *str); @@ -1685,8 +1698,7 @@ extern const int uuid_zero[4]; extern int same_uuid(int a[4], int b[4], int swapuuid); extern void copy_uuid(void *a, int b[4], int swapuuid); extern char *__fname_from_uuid(int id[4], int swap, char *buf, char sep); -extern char *fname_from_uuid(struct supertype *st, - struct mdinfo *info, char *buf, char sep); +extern char *fname_from_uuid(struct mdinfo *info, char *buf); extern unsigned long calc_csum(void *super, int bytes); extern int enough(int level, int raid_disks, int layout, int clean, char *avail); @@ -1708,6 +1720,9 @@ extern int assemble_container_content(struct supertype *st, int mdfd, #define INCR_UNSAFE 2 #define INCR_ALREADY 4 #define INCR_YES 8 + +extern bool devid_matches_criteria(struct supertype *st, dev_t devid, struct spare_criteria *sc); +extern bool disk_fd_matches_criteria(struct supertype *st, int disk_fd, struct spare_criteria *sc); extern struct mdinfo *container_choose_spares(struct supertype *st, struct spare_criteria *criteria, struct domainlist *domlist, @@ -1856,11 +1871,10 @@ static inline char *to_subarray(struct mdstat_ent *ent, char *container) */ static inline sighandler_t signal_s(int sig, sighandler_t handler) { - struct sigaction new_act; - struct sigaction old_act; + struct sigaction new_act = {0}; + struct sigaction old_act = {0}; new_act.sa_handler = handler; - new_act.sa_flags = 0; if (sigaction(sig, &new_act, &old_act) == 0) return old_act.sa_handler; @@ -1899,6 +1913,8 @@ static inline int xasprintf(char **strp, const char *fmt, ...) { #define pr_info(fmt, args...) printf("%s: "fmt, Name, ##args) +#define pr_vrb(fmt, arg...) ((void)(verbose && pr_err(fmt, ##arg))) + void *xmalloc(size_t len); void *xrealloc(void *ptr, size_t len); void *xcalloc(size_t num, size_t size); diff --git a/mdadm.spec b/mdadm.spec deleted file mode 100644 index 12e7859..0000000 --- a/mdadm.spec +++ /dev/null @@ -1,47 +0,0 @@ -Summary: mdadm is used for controlling Linux md devices (aka RAID arrays) -Name: mdadm -Version: 4.3 -Release: 1 -Source: https://www.kernel.org/pub/linux/utils/raid/mdadm/mdadm-%{version}.tar.gz -URL: https://neil.brown.name/blog/mdadm -License: GPL -Group: Utilities/System -BuildRoot: %{_tmppath}/%{name}-root -Obsoletes: mdctl - -%description -mdadm is a program that can be used to create, manage, and monitor -Linux MD (Software RAID) devices. - -%prep -%setup -q -# we want to install in /sbin, not /usr/sbin... -%define _exec_prefix %{nil} - -%build -# This is a debatable issue. The author of this RPM spec file feels that -# people who install RPMs (especially given that the default RPM options -# will strip the binary) are not going to be running gdb against the -# program. -make CXFLAGS="$RPM_OPT_FLAGS" SYSCONFDIR="%{_sysconfdir}" - -%install -make DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} BINDIR=%{_sbindir} install -install -D -m644 mdadm.conf-example $RPM_BUILD_ROOT/%{_sysconfdir}/mdadm.conf - -%clean -rm -rf $RPM_BUILD_ROOT - -%files -%defattr(-,root,root) -%doc TODO ChangeLog mdadm.conf-example COPYING -%{_sbindir}/mdadm -%{_sbindir}/mdmon -/usr/lib/udev/rules.d/01-md-raid-creating.rules -/usr/lib/udev/rules.d/63-md-raid-arrays.rules -/usr/lib/udev/rules.d/64-md-raid-assembly.rules -/usr/lib/udev/rules.d/69-md-clustered-confirm-device.rules -%config(noreplace,missingok)/%{_sysconfdir}/mdadm.conf -%{_mandir}/man*/md* - -%changelog diff --git a/mdmon-design.txt b/mdmon-design.txt deleted file mode 100644 index f09184a..0000000 --- a/mdmon-design.txt +++ /dev/null @@ -1,146 +0,0 @@ - -When managing a RAID1 array which uses metadata other than the -"native" metadata understood by the kernel, mdadm makes use of a -partner program named 'mdmon' to manage some aspects of updating -that metadata and synchronising the metadata with the array state. - -This document provides some details on how mdmon works. - -Containers ----------- - -As background: mdadm makes a distinction between an 'array' and a -'container'. Other sources sometimes use the term 'volume' or -'device' for an 'array', and may use the term 'array' for a -'container'. - -For our purposes: - - a 'container' is a collection of devices which are described by a - single set of metadata. The metadata may be stored equally - on all devices, or different devices may have quite different - subsets of the total metadata. But there is conceptually one set - of metadata that unifies the devices. - - - an 'array' is a set of datablock from various devices which - together are used to present the abstraction of a single linear - sequence of block, which may provide data redundancy or enhanced - performance. - -So a container has some metadata and provides a number of arrays which -are described by that metadata. - -Sometimes this model doesn't work perfectly. For example, global -spares may have their own metadata which is quite different from the -metadata from any device that participates in one or more arrays. -Such a global spare might still need to belong to some container so -that it is available to be used should a failure arise. In that case -we consider the 'metadata' to be the union of the metadata on the -active devices which describes the arrays, and the metadata on the -global spares which only describes the spares. In this case different -devices in the one container will have quite different metadata. - - -Purpose -------- - -The main purpose of mdmon is to update the metadata in response to -changes to the array which need to be reflected in the metadata before -futures writes to the array can safely be performed. -These include: - - transitions from 'clean' to 'dirty'. - - recording the devices have failed. - - recording the progress of a 'reshape' - -This requires mdmon to be running at any time that the array is -writable (a read-only array does not require mdmon to be running). - -Because mdmon must be able to process these metadata updates at any -time, it must (when running) have exclusive write access to the -metadata. Any other changes (e.g. reconfiguration of the array) must -go through mdmon. - -A secondary role for mdmon is to activate spares when a device fails. -This role is much less time-critical than the other metadata updates, -so it could be performed by a separate process, possibly -"mdadm --monitor" which has a related role of moving devices between -arrays. A main reason for including this functionality in mdmon is -that in the native-metadata case this function is handled in the -kernel, and mdmon's reason for existence to provide functionality -which is otherwise handled by the kernel. - - -Design overview ---------------- - -mdmon is structured as two threads with a common address space and -common data structures. These threads are know as the 'monitor' and -the 'manager'. - -The 'monitor' has the primary role of monitoring the array for -important state changes and updating the metadata accordingly. As -writes to the array can be blocked until 'monitor' completes and -acknowledges the update, it much be very careful not to block itself. -In particular it must not block waiting for any write to complete else -it could deadlock. This means that it must not allocate memory as -doing this can require dirty memory to be written out and if the -system choose to write to the array that mdmon is monitoring, the -memory allocation could deadlock. - -So 'monitor' must never allocate memory and must limit the number of -other system call it performs. It may: - - use select (or poll) to wait for activity on a file descriptor - - read from a sysfs file descriptor - - write to a sysfs file descriptor - - write the metadata out to the block devices using O_DIRECT - - send a signal (kill) to the manager thread - -It must not e.g. open files or do anything similar that might allocate -resources. - -The 'manager' thread does everything else that is needed. If any -files are to be opened (e.g. because a device has been added to the -array), the manager does that. If any memory needs to be allocated -(e.g. to hold data about a new array as can happen when one set of -metadata describes several arrays), the manager performs that -allocation. - -The 'manager' is also responsible for communicating with mdadm and -assigning spares to replace failed devices. - - -Handling metadata updates -------------------------- - -There are a number of cases in which mdadm needs to update the -metdata which mdmon is managing. These include: - - creating a new array in an active container - - adding a device to a container - - reconfiguring an array -etc. - -To complete these updates, mdadm must send a message to mdmon which -will merge the update into the metadata as it is at that moment. - -To achieve this, mdmon creates a Unix Domain Socket which the manager -thread listens on. mdadm sends a message over this socket. The -manager thread examines the message to see if it will require -allocating any memory and allocates it. This is done in the -'prepare_update' metadata method. - -The update message is then queued for handling by the monitor thread -which it will do when convenient. The monitor thread calls -->process_update which should atomically make the required changes to -the metadata, making use of the pre-allocate memory as required. Any -memory the is no-longer needed can be placed back in the request and -the manager thread will free it. - -The exact format of a metadata update is up to the implementer of the -metadata handlers. It will simply describe a change that needs to be -made. It will sometimes contain fragments of the metadata to be -copied in to place. However the ->process_update routine must make -sure not to over-write any field that the monitor thread might have -updated, such as a 'device failed' or 'array is dirty' state. - -When the monitor thread has completed the update and written it to the -devices, an acknowledgement message is sent back over the socket so -that mdadm knows it is complete. diff --git a/mdmon.c b/mdmon.c index a2038fe..5fdb5cd 100644 --- a/mdmon.c +++ b/mdmon.c @@ -302,12 +302,12 @@ static int mdmon(char *devnm, int must_fork, int takeover); int main(int argc, char *argv[]) { char *container_name = NULL; - char *devnm = NULL; int status = 0; int opt; int all = 0; int takeover = 0; int dofork = 1; + int mdfd = -1; bool help = false; static struct option options[] = { {"all", 0, NULL, 'a'}, @@ -410,19 +410,20 @@ int main(int argc, char *argv[]) free_mdstat(mdstat); return status; - } else { - int mdfd = open_mddev(container_name, 0); - devnm = fd2devnm(mdfd); + } + + mdfd = open_mddev(container_name, 0); + if (is_fd_valid(mdfd)) { + char *devnm = fd2devnm(mdfd); close(mdfd); - } - if (!devnm) { - pr_err("%s is not a valid md device name\n", - container_name); - return 1; + if (devnm) + return mdmon(devnm, dofork && do_fork(), takeover); } - return mdmon(devnm, dofork && do_fork(), takeover); + + pr_err("%s is not a valid md device name\n", container_name); + return 1; } static int mdmon(char *devnm, int must_fork, int takeover) diff --git a/mkinitramfs b/mkinitramfs deleted file mode 100644 index c6275dd..0000000 --- a/mkinitramfs +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/sh - -# make sure we are being run in the right directory... -if [ -f mkinitramfs ] -then : -else - echo >&2 mkinitramfs must be run from the mdadm source directory. - exit 1 -fi -if [ -f /bin/busybox ] -then : good, it exists - case `file /bin/busybox` in - *statically* ) : good ;; - * ) echo >&2 mkinitramfs: /bin/busybox is not statically linked: cannot proceed. - exit 1 - esac -else - echo >&2 "mkinitramfs: /bin/busybox doesn't exist - please install it statically linked." - exit 1 -fi - -rm -rf initramfs -mkdir initramfs -mkdir initramfs/bin -make mdadm.static -cp mdadm.static initramfs/bin/mdadm -cp /bin/busybox initramfs/bin/busybox -ln initramfs/bin/busybox initramfs/bin/sh -cat <<- END > initramfs/init - #!/bin/sh - - echo 'Auto-assembling boot md array' - mkdir /proc - mount -t proc proc /proc - if [ -n "$rootuuid" ] - then arg=--uuid=$rootuuid - elif [ -n "$mdminor" ] - then arg=--super-minor=$mdminor - else arg=--super-minor=0 - fi - echo "Using $arg" - mdadm -Acpartitions $arg --auto=part /dev/mda - cd / - mount /dev/mda1 /root || mount /dev/mda /root - umount /proc - cd /root - exec chroot . /sbin/init < /dev/console > /dev/console 2>&1 -END -chmod +x initramfs/init - -(cd initramfs - find init bin | cpio -o -H newc | gzip --best -) > init.cpio.gz -rm -rf initramfs -ls -l init.cpio.gz diff --git a/monitor.c b/monitor.c index 4acec67..be0bec7 100644 --- a/monitor.c +++ b/monitor.c @@ -412,6 +412,7 @@ static int read_and_act(struct active_array *a, fd_set *fds) int ret = 0; int count = 0; struct timeval tv; + bool write_checkpoint = false; a->next_state = bad_word; a->next_action = bad_action; @@ -564,52 +565,40 @@ static int read_and_act(struct active_array *a, fd_set *fds) } } - /* Check for recovery checkpoint notifications. We need to be a - * minimum distance away from the last checkpoint to prevent - * over checkpointing. Note reshape checkpointing is handled - * in the second branch. - */ - if (sync_completed > a->last_checkpoint && - sync_completed - a->last_checkpoint > a->info.component_size >> 4 && - a->curr_action > reshape) { - /* A (non-reshape) sync_action has reached a checkpoint. - * Record the updated position in the metadata - */ - a->last_checkpoint = sync_completed; - a->container->ss->set_array_state(a, a->curr_state <= clean); - } else if ((a->curr_action == idle && a->prev_action == reshape) || - (a->curr_action == reshape && - sync_completed > a->last_checkpoint)) { - /* Reshape has progressed or completed so we need to - * update the array state - and possibly the array size - */ + /* Update reshape checkpoint, depending if it finished or progressed */ + if (a->curr_action == idle && a->prev_action == reshape) { + char buf[SYSFS_MAX_BUF_SIZE]; + if (sync_completed != 0) a->last_checkpoint = sync_completed; - /* We might need to update last_checkpoint depending on - * the reason that reshape finished. - * if array reshape is really finished: - * set check point to the end, this allows - * set_array_state() to finalize reshape in metadata - * if reshape if broken: do not set checkpoint to the end - * this allows for reshape restart from checkpoint + + /* + * If reshape really finished, set checkpoint to the end to finalize it. + * Do not set checkpoint if reshape is broken. + * Reshape will restart from last checkpoint. */ - if ((a->curr_action != reshape) && - (a->prev_action == reshape)) { - char buf[SYSFS_MAX_BUF_SIZE]; - if ((sysfs_get_str(&a->info, NULL, - "reshape_position", - buf, - sizeof(buf)) >= 0) && - str_is_none(buf) == true) + if (sysfs_get_str(&a->info, NULL, "reshape_position", buf, sizeof(buf)) >= 0) + if (str_is_none(buf) == true) a->last_checkpoint = a->info.component_size; - } - a->container->ss->set_array_state(a, a->curr_state <= clean); - a->last_checkpoint = sync_completed; + + write_checkpoint = true; } - if (sync_completed > a->last_checkpoint) + if (a->curr_action >= reshape && sync_completed > a->last_checkpoint) { + /* Update checkpoint if neither reshape nor idle action */ a->last_checkpoint = sync_completed; + write_checkpoint = true; + } + + /* Save checkpoint */ + if (write_checkpoint) { + a->container->ss->set_array_state(a, a->curr_state <= clean); + + if (a->curr_action <= reshape) + a->last_checkpoint = sync_completed; + } + if (sync_completed >= a->info.component_size) a->last_checkpoint = 0; diff --git a/platform-intel.h b/platform-intel.h index ce29d3d..3c2bc59 100644 --- a/platform-intel.h +++ b/platform-intel.h @@ -262,7 +262,6 @@ int disk_attached_to_hba(int fd, const char *hba_path); int devt_attached_to_hba(dev_t dev, const char *hba_path); char *devt_to_devpath(dev_t dev, int dev_level, char *buf); int path_attached_to_hba(const char *disk_path, const char *hba_path); -const char *get_sys_dev_type(enum sys_dev_type); const struct orom_entry *get_orom_entry_by_device_id(__u16 dev_id); const struct imsm_orom *get_orom_by_device_id(__u16 device_id); struct sys_dev *device_by_id(__u16 device_id); diff --git a/policy.c b/policy.c index eee9ef6..dfaafdc 100644 --- a/policy.c +++ b/policy.c @@ -365,7 +365,6 @@ struct dev_policy *path_policy(char **paths, char *type) { struct pol_rule *rules; struct dev_policy *pol = NULL; - int i; rules = config_rules; @@ -380,23 +379,104 @@ struct dev_policy *path_policy(char **paths, char *type) rules = rules->next; } - /* Now add any metadata-specific internal knowledge - * about this path - */ - for (i=0; paths && paths[0] && superlist[i]; i++) - if (superlist[i]->get_disk_controller_domain) { - const char *d = - superlist[i]->get_disk_controller_domain( - paths[0]); - if (d) - pol_new(&pol, pol_domain, d, superlist[i]->name); - } - pol_sort(&pol); pol_dedup(pol); return pol; } +/** + * drive_test_and_add_policies() - get policies for drive and add them to pols. + * @st: supertype. + * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL. + * @fd: device descriptor. + * @verbose: verbose flag. + * + * If supertype doesn't support this functionality return success. Use metadata handler to get + * policies. + */ +mdadm_status_t drive_test_and_add_policies(struct supertype *st, dev_policy_t **pols, int fd, + const int verbose) +{ + if (!st->ss->test_and_add_drive_policies) + return MDADM_STATUS_SUCCESS; + + if (st->ss->test_and_add_drive_policies(pols, fd, verbose) == MDADM_STATUS_SUCCESS) { + /* After successful call list cannot be empty */ + assert(*pols); + return MDADM_STATUS_SUCCESS; + } + + return MDADM_STATUS_ERROR; +} + +/** + * sysfs_test_and_add_policies() - get policies for mddev and add them to pols. + * @st: supertype. + * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL. + * @mdi: mdinfo describes the MD array, must have GET_DISKS option. + * @verbose: verbose flag. + * + * If supertype doesn't support this functionality return success. To get policies, all disks + * connected to mddev are analyzed. + */ +mdadm_status_t sysfs_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols, + struct mdinfo *mdi, const int verbose) +{ + struct mdinfo *sd; + + if (!st->ss->test_and_add_drive_policies) + return MDADM_STATUS_SUCCESS; + + for (sd = mdi->devs; sd; sd = sd->next) { + char *devpath = map_dev(sd->disk.major, sd->disk.minor, 0); + int fd = dev_open(devpath, O_RDONLY); + int rv; + + if (!is_fd_valid(fd)) { + pr_err("Cannot open fd for %s\n", devpath); + return MDADM_STATUS_ERROR; + } + + rv = drive_test_and_add_policies(st, pols, fd, verbose); + close(fd); + + if (rv) + return MDADM_STATUS_ERROR; + } + + return MDADM_STATUS_SUCCESS; +} + +/** + * mddev_test_and_add_policies() - get policies for mddev and add them to pols. + * @st: supertype. + * @pols: pointer to pointer of first list entry, cannot be NULL, may point to NULL. + * @array_fd: MD device descriptor. + * @verbose: verbose flag. + * + * If supertype doesn't support this functionality return success. Use fd to extract disks. + */ +mdadm_status_t mddev_test_and_add_drive_policies(struct supertype *st, dev_policy_t **pols, + int array_fd, const int verbose) +{ + struct mdinfo *sra; + int ret; + + if (!st->ss->test_and_add_drive_policies) + return MDADM_STATUS_SUCCESS; + + sra = sysfs_read(array_fd, NULL, GET_DEVS); + if (!sra) { + pr_err("Cannot load sysfs for %s\n", fd2devnm(array_fd)); + return MDADM_STATUS_ERROR; + } + + ret = sysfs_test_and_add_drive_policies(st, pols, sra, verbose); + + sysfs_free(sra); + return ret; +} + void pol_add(struct dev_policy **pol, char *name, char *val, char *metadata) @@ -679,7 +759,6 @@ int domain_test(struct domainlist *dom, struct dev_policy *pol, * 1: has domains, all match */ int found_any = -1; - int has_one_domain = 1; struct dev_policy *p; pol = pol_find(pol, pol_domain); @@ -689,9 +768,6 @@ int domain_test(struct domainlist *dom, struct dev_policy *pol, dom = dom->next; if (!dom || strcmp(dom->dom, p->value) != 0) return 0; - if (has_one_domain && metadata && strcmp(metadata, "imsm") == 0) - found_any = -1; - has_one_domain = 0; } return found_any; } diff --git a/super-ddf.c b/super-ddf.c index 7571e3b..21426c7 100644 --- a/super-ddf.c +++ b/super-ddf.c @@ -1617,7 +1617,7 @@ static void brief_examine_super_ddf(struct supertype *st, int verbose) struct mdinfo info; char nbuf[64]; getinfo_super_ddf(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); printf("ARRAY metadata=ddf UUID=%s\n", nbuf + 5); } @@ -1632,7 +1632,7 @@ static void brief_examine_subarrays_ddf(struct supertype *st, int verbose) unsigned int i; char nbuf[64]; getinfo_super_ddf(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); for (i = 0; i < be16_to_cpu(ddf->virt->max_vdes); i++) { struct virtual_entry *ve = &ddf->virt->entries[i]; @@ -1645,7 +1645,7 @@ static void brief_examine_subarrays_ddf(struct supertype *st, int verbose) ddf->currentconf =&vcl; vcl.vcnum = i; uuid_from_super_ddf(st, info.uuid); - fname_from_uuid(st, &info, nbuf1, ':'); + fname_from_uuid(&info, nbuf1); _ddf_array_name(namebuf, ddf, i); printf("ARRAY%s%s container=%s member=%d UUID=%s\n", namebuf[0] == '\0' ? "" : " " DEV_MD_DIR, namebuf, @@ -1658,7 +1658,7 @@ static void export_examine_super_ddf(struct supertype *st) struct mdinfo info; char nbuf[64]; getinfo_super_ddf(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); printf("MD_METADATA=ddf\n"); printf("MD_LEVEL=container\n"); printf("MD_UUID=%s\n", nbuf+5); @@ -1798,7 +1798,7 @@ static void brief_detail_super_ddf(struct supertype *st, char *subarray) return; else uuid_of_ddf_subarray(ddf, vcnum, info.uuid); - fname_from_uuid(st, &info, nbuf,':'); + fname_from_uuid(&info, nbuf); printf(" UUID=%s", nbuf + 5); } @@ -5162,6 +5162,7 @@ struct superswitch super_ddf = { .default_geometry = default_geometry_ddf, .external = 1, + .swapuuid = 0, /* for mdmon */ .open_new = ddf_open_new, diff --git a/super-intel.c b/super-intel.c index dbea235..1faab60 100644 --- a/super-intel.c +++ b/super-intel.c @@ -27,6 +27,7 @@ #include #include #include +#include "drive_encryption.h" /* MPB == Metadata Parameter Block */ #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. " @@ -393,8 +394,6 @@ struct md_list { struct md_list *next; }; -#define pr_vrb(fmt, arg...) (void) (verbose && pr_err(fmt, ##arg)) - static __u8 migr_type(struct imsm_dev *dev) { if (dev->vol.migr_type == MIGR_VERIFY && @@ -1736,48 +1735,6 @@ static __u32 imsm_min_reserved_sectors(struct intel_super *super) return (remainder < rv) ? remainder : rv; } -/* - * Return minimum size of a spare and sector size - * that can be used in this array - */ -int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c) -{ - struct intel_super *super = st->sb; - struct dl *dl; - struct extent *e; - int i; - unsigned long long size = 0; - - c->min_size = 0; - c->sector_size = 0; - - if (!super) - return -EINVAL; - /* find first active disk in array */ - dl = super->disks; - while (dl && (is_failed(&dl->disk) || dl->index == -1)) - dl = dl->next; - if (!dl) - return -EINVAL; - /* find last lba used by subarrays */ - e = get_extents(super, dl, 0); - if (!e) - return -EINVAL; - for (i = 0; e[i].size; i++) - continue; - if (i > 0) - size = e[i-1].start + e[i-1].size; - free(e); - - /* add the amount of space needed for metadata */ - size += imsm_min_reserved_sectors(super); - - c->min_size = size * 512; - c->sector_size = super->sector_size; - - return 0; -} - static bool is_gen_migration(struct imsm_dev *dev); #define IMSM_4K_DIV 8 @@ -2259,7 +2216,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) else printf("not supported\n"); getinfo_super_imsm(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); printf(" UUID : %s\n", nbuf + 5); sum = __le32_to_cpu(mpb->check_sum); printf(" Checksum : %08x %s\n", sum, @@ -2284,7 +2241,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost) super->current_vol = i; getinfo_super_imsm(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); print_imsm_dev(super, dev, nbuf + 5, super->disks->index); } for (i = 0; i < mpb->num_disks; i++) { @@ -2309,7 +2266,7 @@ static void brief_examine_super_imsm(struct supertype *st, int verbose) char nbuf[64]; getinfo_super_imsm(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5); } @@ -2326,13 +2283,13 @@ static void brief_examine_subarrays_imsm(struct supertype *st, int verbose) return; getinfo_super_imsm(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); super->current_vol = i; getinfo_super_imsm(st, &info, NULL); - fname_from_uuid(st, &info, nbuf1, ':'); + fname_from_uuid(&info, nbuf1); printf("ARRAY " DEV_MD_DIR "%.16s container=%s member=%d UUID=%s\n", dev->volume, nbuf + 5, i, nbuf1 + 5); } @@ -2346,7 +2303,7 @@ static void export_examine_super_imsm(struct supertype *st) char nbuf[64]; getinfo_super_imsm(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); printf("MD_METADATA=imsm\n"); printf("MD_LEVEL=container\n"); printf("MD_UUID=%s\n", nbuf+5); @@ -2366,7 +2323,7 @@ static void detail_super_imsm(struct supertype *st, char *homehost, super->current_vol = strtoul(subarray, NULL, 10); getinfo_super_imsm(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); printf("\n UUID : %s\n", nbuf + 5); super->current_vol = temp_vol; @@ -2383,7 +2340,7 @@ static void brief_detail_super_imsm(struct supertype *st, char *subarray) super->current_vol = strtoul(subarray, NULL, 10); getinfo_super_imsm(st, &info, NULL); - fname_from_uuid(st, &info, nbuf, ':'); + fname_from_uuid(&info, nbuf); printf(" UUID=%s", nbuf + 5); super->current_vol = temp_vol; @@ -2393,12 +2350,41 @@ static int imsm_read_serial(int fd, char *devname, __u8 *serial, size_t serial_buf_len); static void fd2devname(int fd, char *name); -static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose) +void print_encryption_information(int disk_fd, enum sys_dev_type hba_type) +{ + struct encryption_information information = {0}; + mdadm_status_t status = MDADM_STATUS_SUCCESS; + const char *indent = " "; + + switch (hba_type) { + case SYS_DEV_VMD: + case SYS_DEV_NVME: + status = get_nvme_opal_encryption_information(disk_fd, &information, 1); + break; + case SYS_DEV_SATA: + case SYS_DEV_SATA_VMD: + status = get_ata_encryption_information(disk_fd, &information, 1); + break; + default: + return; + } + + if (status) { + pr_err("Failed to get drive encryption information.\n"); + return; + } + + printf("%sEncryption(Ability|Status): %s|%s\n", indent, + get_encryption_ability_string(information.ability), + get_encryption_status_string(information.status)); +} + +static int ahci_enumerate_ports(struct sys_dev *hba, int port_count, int host_base, int verbose) { /* dump an unsorted list of devices attached to AHCI Intel storage * controller, as well as non-connected ports */ - int hba_len = strlen(hba_path) + 1; + int hba_len = strlen(hba->path) + 1; struct dirent *ent; DIR *dir; char *path = NULL; @@ -2434,7 +2420,7 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b path = devt_to_devpath(makedev(major, minor), 1, NULL); if (!path) continue; - if (!path_attached_to_hba(path, hba_path)) { + if (!path_attached_to_hba(path, hba->path)) { free(path); path = NULL; continue; @@ -2537,6 +2523,8 @@ static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_b printf(" (%s)\n", buf); else printf(" ()\n"); + + print_encryption_information(fd, hba->type); close(fd); } free(path); @@ -2601,6 +2589,8 @@ static int print_nvme_info(struct sys_dev *hba) else printf("()\n"); + print_encryption_information(fd, hba->type); + skip: close_fd(&fd); } @@ -2856,7 +2846,7 @@ static int detail_platform_imsm(int verbose, int enumerate_only, char *controlle hba->path, get_sys_dev_type(hba->type)); if (hba->type == SYS_DEV_SATA || hba->type == SYS_DEV_SATA_VMD) { host_base = ahci_get_port_count(hba->path, &port_count); - if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) { + if (ahci_enumerate_ports(hba, port_count, host_base, verbose)) { if (verbose > 0) pr_err("failed to enumerate ports on %s controller at %s.\n", get_sys_dev_type(hba->type), hba->pci_id); @@ -4174,17 +4164,17 @@ static int imsm_read_serial(int fd, char *devname, memset(buf, 0, sizeof(buf)); + if (check_env("IMSM_DEVNAME_AS_SERIAL")) { + memset(serial, 0, serial_buf_len); + fd2devname(fd, (char *) serial); + return 0; + } + rv = nvme_get_serial(fd, buf, sizeof(buf)); if (rv) rv = scsi_get_serial(fd, buf, sizeof(buf)); - if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) { - memset(serial, 0, MAX_RAID_SERIAL_LEN); - fd2devname(fd, (char *) serial); - return 0; - } - if (rv != 0) { if (devname) pr_err("Failed to retrieve serial for %s\n", @@ -8771,6 +8761,9 @@ static int imsm_set_array_state(struct active_array *a, int consistent) super->updates_pending++; } + if (a->prev_action == idle) + goto skip_mark_checkpoint; + mark_checkpoint: /* skip checkpointing for general migration, * it is controlled in mdadm @@ -11259,39 +11252,247 @@ abort: return retval; } -static char disk_by_path[] = "/dev/disk/by-path/"; - -static const char *imsm_get_disk_controller_domain(const char *path) -{ - char disk_path[PATH_MAX]; - char *drv=NULL; - struct stat st; - - strncpy(disk_path, disk_by_path, PATH_MAX); - strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1); - if (stat(disk_path, &st) == 0) { - struct sys_dev* hba; - char *path; - - path = devt_to_devpath(st.st_rdev, 1, NULL); - if (path == NULL) - return "unknown"; - hba = find_disk_attached_hba(-1, path); - if (hba && hba->type == SYS_DEV_SAS) - drv = "isci"; - else if (hba && (hba->type == SYS_DEV_SATA || hba->type == SYS_DEV_SATA_VMD)) - drv = "ahci"; - else if (hba && hba->type == SYS_DEV_VMD) - drv = "vmd"; - else if (hba && hba->type == SYS_DEV_NVME) - drv = "nvme"; - else - drv = "unknown"; - dprintf("path: %s hba: %s attached: %s\n", - path, (hba) ? hba->path : "NULL", drv); - free(path); +/** + * test_and_add_drive_controller_policy_imsm() - add disk controller to policies list. + * @type: Policy type to search on list. + * @pols: List of currently recorded policies. + * @disk_fd: File descriptor of the device to check. + * @hba: The hba disk is attached, could be NULL if verification is disabled. + * @verbose: verbose flag. + * + * IMSM cares about drive physical placement. If @hba is not set, it adds unknown policy. + * If there is no controller policy on pols we are free to add first one. If there is a policy then, + * new must be the same - no controller mixing allowed. + */ +static mdadm_status_t +test_and_add_drive_controller_policy_imsm(const char * const type, dev_policy_t **pols, int disk_fd, + struct sys_dev *hba, const int verbose) +{ + const char *controller_policy = get_sys_dev_type(SYS_DEV_UNKNOWN); + struct dev_policy *pol = pol_find(*pols, (char *)type); + char devname[MAX_RAID_SERIAL_LEN]; + + if (hba) + controller_policy = get_sys_dev_type(hba->type); + + if (!pol) { + pol_add(pols, (char *)type, (char *)controller_policy, "imsm"); + return MDADM_STATUS_SUCCESS; + } + + if (strcmp(pol->value, controller_policy) == 0) + return MDADM_STATUS_SUCCESS; + + fd2devname(disk_fd, devname); + pr_vrb("Intel(R) raid controller \"%s\" found for %s, but \"%s\" was detected earlier\n", + controller_policy, devname, pol->value); + pr_vrb("Disks under different controllers cannot be used, aborting\n"); + + return MDADM_STATUS_ERROR; +} + +/** + * test_and_add_drive_encryption_policy_imsm() - add disk encryption to policies list. + * @type: policy type to search in the list. + * @pols: list of currently recorded policies. + * @disk_fd: file descriptor of the device to check. + * @hba: The hba to which the drive is attached, could be NULL if verification is disabled. + * @verbose: verbose flag. + * + * IMSM cares about drive encryption state. It is not allowed to mix disks with different + * encryption state within one md device. + * If there is no encryption policy on pols we are free to add first one. + * If there is a policy then, new must be the same. + */ +static mdadm_status_t +test_and_add_drive_encryption_policy_imsm(const char * const type, dev_policy_t **pols, int disk_fd, + struct sys_dev *hba, const int verbose) +{ + struct dev_policy *expected_policy = pol_find(*pols, (char *)type); + struct encryption_information information = {0}; + char *encryption_state = "Unknown"; + int status = MDADM_STATUS_SUCCESS; + bool encryption_checked = true; + char devname[PATH_MAX]; + + if (!hba) + goto check_policy; + + switch (hba->type) { + case SYS_DEV_NVME: + case SYS_DEV_VMD: + status = get_nvme_opal_encryption_information(disk_fd, &information, verbose); + break; + case SYS_DEV_SATA: + case SYS_DEV_SATA_VMD: + status = get_ata_encryption_information(disk_fd, &information, verbose); + break; + default: + encryption_checked = false; + } + + if (status) { + fd2devname(disk_fd, devname); + pr_vrb("Failed to read encryption information of device %s\n", devname); + return MDADM_STATUS_ERROR; + } + + if (encryption_checked) { + if (information.status == ENC_STATUS_LOCKED) { + fd2devname(disk_fd, devname); + pr_vrb("Device %s is in Locked state, cannot use. Aborting.\n", devname); + return MDADM_STATUS_ERROR; + } + encryption_state = (char *)get_encryption_status_string(information.status); } - return drv; + +check_policy: + if (expected_policy) { + if (strcmp(expected_policy->value, encryption_state) == 0) + return MDADM_STATUS_SUCCESS; + + fd2devname(disk_fd, devname); + pr_vrb("Encryption status \"%s\" detected for disk %s, but \"%s\" status was detected eariler.\n", + encryption_state, devname, expected_policy->value); + pr_vrb("Disks with different encryption status cannot be used.\n"); + return MDADM_STATUS_ERROR; + } + + pol_add(pols, (char *)type, encryption_state, "imsm"); + + return MDADM_STATUS_SUCCESS; +} + +struct imsm_drive_policy { + char *type; + mdadm_status_t (*test_and_add_drive_policy)(const char * const type, + struct dev_policy **pols, int disk_fd, + struct sys_dev *hba, const int verbose); +}; + +struct imsm_drive_policy imsm_policies[] = { + {"controller", test_and_add_drive_controller_policy_imsm}, + {"encryption", test_and_add_drive_encryption_policy_imsm} +}; + +mdadm_status_t test_and_add_drive_policies_imsm(struct dev_policy **pols, int disk_fd, + const int verbose) +{ + struct imsm_drive_policy *imsm_pol; + struct sys_dev *hba = NULL; + char path[PATH_MAX]; + mdadm_status_t ret; + unsigned int i; + + /* If imsm platform verification is disabled, do not search for hba. */ + if (check_no_platform() != 1) { + if (!diskfd_to_devpath(disk_fd, 1, path)) { + pr_vrb("IMSM: Failed to retrieve device path by file descriptor.\n"); + return MDADM_STATUS_ERROR; + } + + hba = find_disk_attached_hba(disk_fd, path); + if (!hba) { + pr_vrb("IMSM: Failed to find hba for %s\n", path); + return MDADM_STATUS_ERROR; + } + } + + for (i = 0; i < ARRAY_SIZE(imsm_policies); i++) { + imsm_pol = &imsm_policies[i]; + + ret = imsm_pol->test_and_add_drive_policy(imsm_pol->type, pols, disk_fd, hba, + verbose); + if (ret != MDADM_STATUS_SUCCESS) + /* Inherit error code */ + return ret; + } + + return MDADM_STATUS_SUCCESS; +} + +/** + * get_spare_criteria_imsm() - set spare criteria. + * @st: supertype. + * @mddev_path: path to md device devnode, it must be container. + * @c: spare_criteria struct to fill, not NULL. + * + * If superblock is not loaded, use mddev_path to load_container. It must be given in this case. + * Filles size and sector size accordingly to superblock. + */ +mdadm_status_t get_spare_criteria_imsm(struct supertype *st, char *mddev_path, + struct spare_criteria *c) +{ + mdadm_status_t ret = MDADM_STATUS_ERROR; + bool free_superblock = false; + unsigned long long size = 0; + struct intel_super *super; + struct extent *e; + struct dl *dl; + int i; + + /* If no superblock and no mddev_path, we cannot load superblock. */ + assert(st->sb || mddev_path); + + if (mddev_path) { + int fd = open(mddev_path, O_RDONLY); + mdadm_status_t rv; + + if (!is_fd_valid(fd)) + return MDADM_STATUS_ERROR; + + if (!st->sb) { + if (load_container_imsm(st, fd, st->devnm)) { + close(fd); + return MDADM_STATUS_ERROR; + } + free_superblock = true; + } + + rv = mddev_test_and_add_drive_policies(st, &c->pols, fd, 0); + close(fd); + + if (rv != MDADM_STATUS_SUCCESS) + goto out; + } + + super = st->sb; + + /* find first active disk in array */ + dl = super->disks; + while (dl && (is_failed(&dl->disk) || dl->index == -1)) + dl = dl->next; + + if (!dl) + goto out; + + /* find last lba used by subarrays */ + e = get_extents(super, dl, 0); + if (!e) + goto out; + + for (i = 0; e[i].size; i++) + continue; + if (i > 0) + size = e[i - 1].start + e[i - 1].size; + free(e); + + /* add the amount of space needed for metadata */ + size += imsm_min_reserved_sectors(super); + + c->min_size = size * 512; + c->sector_size = super->sector_size; + c->criteria_set = true; + ret = MDADM_STATUS_SUCCESS; + +out: + if (free_superblock) + free_super_imsm(st); + + if (ret != MDADM_STATUS_SUCCESS) + c->criteria_set = false; + + return ret; } static char *imsm_find_array_devnm_by_subdev(int subdev, char *container) @@ -11422,10 +11623,15 @@ static int imsm_reshape_is_allowed_on_container(struct supertype *st, */ static struct mdinfo *get_spares_for_grow(struct supertype *st) { - struct spare_criteria sc; + struct spare_criteria sc = {0}; + struct mdinfo *spares; + + get_spare_criteria_imsm(st, NULL, &sc); + spares = container_choose_spares(st, &sc, NULL, NULL, NULL, 0); + + dev_policy_free(sc.pols); - get_spare_criteria_imsm(st, &sc); - return container_choose_spares(st, &sc, NULL, NULL, NULL, 0); + return spares; } /****************************************************************************** @@ -12987,7 +13193,7 @@ struct superswitch super_imsm = { .update_subarray = update_subarray_imsm, .load_container = load_container_imsm, .default_geometry = default_geometry_imsm, - .get_disk_controller_domain = imsm_get_disk_controller_domain, + .test_and_add_drive_policies = test_and_add_drive_policies_imsm, .reshape_super = imsm_reshape_super, .manage_reshape = imsm_manage_reshape, .recover_backup = recover_backup_imsm, @@ -13020,6 +13226,7 @@ struct superswitch super_imsm = { .validate_ppl = validate_ppl_imsm, .external = 1, + .swapuuid = 0, .name = "imsm", /* for mdmon */ diff --git a/super0.c b/super0.c index a7c5f81..9b8a1bd 100644 --- a/super0.c +++ b/super0.c @@ -1369,5 +1369,7 @@ struct superswitch super0 = { .locate_bitmap = locate_bitmap0, .write_bitmap = write_bitmap0, .free_super = free_super0, + + .swapuuid = 0, .name = "0.90", }; diff --git a/super1.c b/super1.c index 871d19f..81d29a6 100644 --- a/super1.c +++ b/super1.c @@ -575,17 +575,6 @@ static void examine_super1(struct supertype *st, char *homehost) inconsistent = 1; } } -#if 0 - /* This is confusing too */ - faulty = 0; - for (i = 0; i < __le32_to_cpu(sb->max_dev); i++) { - int role = __le16_to_cpu(sb->dev_roles[i]); - if (role == MD_DISK_ROLE_FAULTY) - faulty++; - } - if (faulty) - printf(" %d failed", faulty); -#endif printf(" ('A' == active, '.' == missing, 'R' == replacing)"); printf("\n"); for (d = 0; d < __le32_to_cpu(sb->max_dev); d++) { @@ -1752,7 +1741,10 @@ static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk, di->devname = devname; di->disk = *dk; di->data_offset = data_offset; - get_dev_size(fd, NULL, &di->dev_size); + + if (is_fd_valid(fd)) + get_dev_size(fd, NULL, &di->dev_size); + di->next = NULL; *dip = di; diff --git a/sysfs.c b/sysfs.c index f95ef70..20fe1e9 100644 --- a/sysfs.c +++ b/sysfs.c @@ -74,6 +74,29 @@ void sysfs_free(struct mdinfo *sra) } } +/** + * sysfs_get_container_devnm() - extract container device name. + * @mdi: md_info describes member array, with GET_VERSION option. + * @buf: buf to fill, must be MD_NAME_MAX. + * + * External array version is in format {/,-}/ + * Extract container_devnm from it and safe it in @buf. + */ +void sysfs_get_container_devnm(struct mdinfo *mdi, char *buf) +{ + char *p; + + assert(is_subarray(mdi->text_version)); + + /* Skip first special sign */ + snprintf(buf, MD_NAME_MAX, "%s", mdi->text_version + 1); + + /* Remove array index */ + p = strchr(buf, '/'); + if (p) + *p = 0; +} + int sysfs_open(char *devnm, char *devname, char *attr) { char fname[MAX_SYSFS_PATH_LEN]; @@ -655,7 +678,7 @@ int sysfs_set_safemode(struct mdinfo *sra, unsigned long ms) return sysfs_set_str(sra, NULL, "safe_mode_delay", delay); } -int sysfs_set_array(struct mdinfo *info, int vers) +int sysfs_set_array(struct mdinfo *info) { int rv = 0; char ver[100]; @@ -679,9 +702,7 @@ int sysfs_set_array(struct mdinfo *info, int vers) if (strlen(buf) >= 9 && buf[9] == '-') ver[9] = '-'; - if ((vers % 100) < 2 || - sysfs_set_str(info, NULL, "metadata_version", - ver) < 0) { + if (sysfs_set_str(info, NULL, "metadata_version", ver) < 0) { pr_err("This kernel does not support external metadata.\n"); return 1; } @@ -1100,3 +1121,32 @@ void sysfsline(char *line) sr->next = sysfs_rules; sysfs_rules = sr; } + +/** + * sysfs_is_libata_allow_tpm_enabled() - check if libata allow_tmp is enabled. + * @verbose: verbose flag. + * + * Check if libata allow_tmp flag is set, this is required for SATA Opal Security commands to work. + * + * Return: true if allow_tpm enable, false otherwise. + */ +bool sysfs_is_libata_allow_tpm_enabled(const int verbose) +{ + const char *path = "/sys/module/libata/parameters/allow_tpm"; + const char *expected_value = "1"; + int fd = open(path, O_RDONLY); + char buf[3]; + + if (!is_fd_valid(fd)) { + pr_vrb("Failed open file descriptor to %s. Cannot check libata allow_tpm param.\n", + path); + return false; + } + + sysfs_fd_get_str(fd, buf, sizeof(buf)); + close(fd); + + if (strncmp(buf, expected_value, 1) == 0) + return true; + return false; +} diff --git a/test b/test index 49a36c3..338c2db 100755 --- a/test +++ b/test @@ -1,11 +1,12 @@ #!/bin/bash # # run test suite for mdadm -mdadm=$PWD/mdadm +mdadm=`which mdadm` targetdir="/var/tmp" logdir="$targetdir" config=/tmp/mdadm.conf testdir=$PWD/tests +system_speed_limit=`cat /proc/sys/dev/raid/speed_limit_max` devlist= savelogs=0 @@ -20,9 +21,6 @@ DEVTYPE=loop INTEGRITY=yes LVM_VOLGROUP=mdtest -# make sure to test local mdmon, not system one -export MDADM_NO_SYSTEMCTL=1 - # assume md0, md1, md2 exist in /dev md0=/dev/md0 md1=/dev/md1 @@ -41,7 +39,10 @@ ctrl_c() { ctrl_c_error=1 } -# mdadm always adds --quiet, and we want to see any unexpected messages +restore_system_speed_limit() { + echo $system_speed_limit > /proc/sys/dev/raid/speed_limit_max +} + mdadm() { rm -f $targetdir/stderr case $* in @@ -63,10 +64,10 @@ mdadm() { $mdadm --zero $args > /dev/null } done - $mdadm 2> $targetdir/stderr --quiet "$@" --auto=yes + $mdadm 2> $targetdir/stderr "$@" --auto=yes ;; * ) - $mdadm 2> $targetdir/stderr --quiet "$@" + $mdadm 2> $targetdir/stderr "$@" ;; esac rv=$? @@ -99,8 +100,6 @@ do_test() { fi rm -f $targetdir/stderr - # this might have been reset: restore the default. - echo 2000 > /proc/sys/dev/raid/speed_limit_max do_clean # source script in a subshell, so it has access to our # namespace, but cannot change it. @@ -122,6 +121,7 @@ do_test() { echo " (KNOWN BROKEN TEST: $_broken_msg)" fi fi + restore_system_speed_limit [ "$savelogs" == "1" ] && mv -f $targetdir/log $logdir/$_basename.log [ "$ctrl_c_error" == "1" ] && exit 1 @@ -299,7 +299,15 @@ parse_args() { done } +print_warning() { + cat <<-EOF + Warning! Tests are performed on system level mdadm! + If you want to test local build, you need to install it first! + EOF +} + main() { + print_warning do_setup echo "Testing on linux-$(uname -r) kernel" @@ -329,6 +337,7 @@ main() { break fi done + exit 0 } diff --git a/tests/func.sh b/tests/func.sh index 1c1a28a..b474442 100644 --- a/tests/func.sh +++ b/tests/func.sh @@ -213,7 +213,6 @@ do_setup() { path1=$dev7 ulimit -c unlimited [ -f /proc/mdstat ] || modprobe md_mod - echo 2000 > /proc/sys/dev/raid/speed_limit_max echo 0 > /sys/module/md_mod/parameters/start_ro } diff --git a/udev.c b/udev.c index bc4722b..066e6ab 100644 --- a/udev.c +++ b/udev.c @@ -26,7 +26,10 @@ #include #include #include + +#ifndef NO_LIBUDEV #include +#endif static char *unblock_path; diff --git a/util.c b/util.c index b145447..9e83704 100644 --- a/util.c +++ b/util.c @@ -36,7 +36,7 @@ #include #include #include - +#include /* * following taken from linux/blkpg.h because they aren't @@ -589,19 +589,21 @@ char *__fname_from_uuid(int id[4], int swap, char *buf, char sep) } -char *fname_from_uuid(struct supertype *st, struct mdinfo *info, - char *buf, char sep) +/** + * fname_from_uuid() - generate uuid string. Should not be used with super1. + * @info: info with uuid + * @buf: buf to fill. + * + * This routine should not be used with super1. See detail_fname_from_uuid() for details. It does + * not use superswitch swapuuid as it should be 0 but it has to do UUID conversion if host is big + * endian- left for backward compatibility. + */ +char *fname_from_uuid(struct mdinfo *info, char *buf) { - // dirty hack to work around an issue with super1 superblocks... - // super1 superblocks need swapuuid set in order for assembly to - // work, but can't have it set if we want this printout to match - // all the other uuid printouts in super1.c, so we force swapuuid - // to 1 to make our printout match the rest of super1 #if __BYTE_ORDER == BIG_ENDIAN - return __fname_from_uuid(info->uuid, 1, buf, sep); + return __fname_from_uuid(info->uuid, true, buf, ':'); #else - return __fname_from_uuid(info->uuid, (st->ss == &super1) ? 1 : - st->ss->swapuuid, buf, sep); + return __fname_from_uuid(info->uuid, false, buf, ':'); #endif } @@ -1266,40 +1268,6 @@ struct supertype *super_by_fd(int fd, char **subarrayp) return st; } -int dev_size_from_id(dev_t id, unsigned long long *size) -{ - char buf[20]; - int fd; - - sprintf(buf, "%d:%d", major(id), minor(id)); - fd = dev_open(buf, O_RDONLY); - if (fd < 0) - return 0; - if (get_dev_size(fd, NULL, size)) { - close(fd); - return 1; - } - close(fd); - return 0; -} - -int dev_sector_size_from_id(dev_t id, unsigned int *size) -{ - char buf[20]; - int fd; - - sprintf(buf, "%d:%d", major(id), minor(id)); - fd = dev_open(buf, O_RDONLY); - if (fd < 0) - return 0; - if (get_dev_sector_size(fd, NULL, size)) { - close(fd); - return 1; - } - close(fd); - return 0; -} - struct supertype *dup_super(struct supertype *orig) { struct supertype *st; @@ -1899,8 +1867,7 @@ int set_array_info(int mdfd, struct supertype *st, struct mdinfo *info) int rv; if (st->ss->external) - return sysfs_set_array(info, 9003); - + return sysfs_set_array(info); memset(&inf, 0, sizeof(inf)); inf.major_version = info->array.major_version; inf.minor_version = info->array.minor_version; @@ -2088,6 +2055,65 @@ void append_metadata_update(struct supertype *st, void *buf, int len) unsigned int __invalid_size_argument_for_IOC = 0; #endif +/** + * disk_fd_matches_criteria() - check if device matches spare criteria. + * @st: supertype, not NULL. + * @disk_fd: file descriptor of the disk. + * @sc: criteria to test. + * + * Return: true if disk matches criteria, false otherwise. + */ +bool disk_fd_matches_criteria(struct supertype *st, int disk_fd, struct spare_criteria *sc) +{ + unsigned int dev_sector_size = 0; + unsigned long long dev_size = 0; + + if (!sc->criteria_set) + return true; + + if (!get_dev_size(disk_fd, NULL, &dev_size) || dev_size < sc->min_size) + return false; + + if (!get_dev_sector_size(disk_fd, NULL, &dev_sector_size) || + sc->sector_size != dev_sector_size) + return false; + + if (drive_test_and_add_policies(st, &sc->pols, disk_fd, 0)) + return false; + + return true; +} + +/** + * devid_matches_criteria() - check if device referenced by devid matches spare criteria. + * @st: supertype, not NULL. + * @devid: devid of the device to check. + * @sc: criteria to test. + * + * Return: true if disk matches criteria, false otherwise. + */ +bool devid_matches_criteria(struct supertype *st, dev_t devid, struct spare_criteria *sc) +{ + char buf[NAME_MAX]; + bool ret; + int fd; + + if (!sc->criteria_set) + return true; + + snprintf(buf, NAME_MAX, "%d:%d", major(devid), minor(devid)); + + fd = dev_open(buf, O_RDONLY); + if (!is_fd_valid(fd)) + return false; + + /* Error code inherited */ + ret = disk_fd_matches_criteria(st, fd, sc); + + close(fd); + return ret; +} + /* Pick all spares matching given criteria from a container * if min_size == 0 do not check size * if domlist == NULL do not check domains @@ -2111,28 +2137,13 @@ struct mdinfo *container_choose_spares(struct supertype *st, dp = &disks->devs; disks->array.spare_disks = 0; while (*dp) { - int found = 0; + bool found = false; + d = *dp; if (d->disk.state == 0) { - /* check if size is acceptable */ - unsigned long long dev_size; - unsigned int dev_sector_size; - int size_valid = 0; - int sector_size_valid = 0; - dev_t dev = makedev(d->disk.major,d->disk.minor); - if (!criteria->min_size || - (dev_size_from_id(dev, &dev_size) && - dev_size >= criteria->min_size)) - size_valid = 1; - - if (!criteria->sector_size || - (dev_sector_size_from_id(dev, &dev_sector_size) && - criteria->sector_size == dev_sector_size)) - sector_size_valid = 1; - - found = size_valid && sector_size_valid; + found = devid_matches_criteria(st, dev, criteria); /* check if domain matches */ if (found && domlist) { @@ -2141,7 +2152,8 @@ struct mdinfo *container_choose_spares(struct supertype *st, pol_add(&pol, pol_domain, spare_group, NULL); if (domain_test(domlist, pol, metadata) != 1) - found = 0; + found = false; + dev_policy_free(pol); } } -- cgit v1.2.3