diff options
38 files changed, 1370 insertions, 615 deletions
@@ -341,8 +341,10 @@ static int select_devices(struct mddev_dev *devlist, st->ss->free_super(st); dev_policy_free(pol); domain_free(domains); - if (tst) + if (tst) { tst->ss->free_super(tst); + free(tst); + } return -1; } @@ -417,6 +419,7 @@ static int select_devices(struct mddev_dev *devlist, st->ss->free_super(st); dev_policy_free(pol); domain_free(domains); + free(st); return -1; } if (c->verbose > 0) @@ -533,6 +536,7 @@ static int select_devices(struct mddev_dev *devlist, st->ss->free_super(st); dev_policy_free(pol); domain_free(domains); + free(tst); return -1; } tmpdev->used = 1; @@ -546,8 +550,10 @@ static int select_devices(struct mddev_dev *devlist, } dev_policy_free(pol); pol = NULL; - if (tst) + if (tst) { tst->ss->free_super(tst); + free(tst); + } } /* Check if we found some imsm spares but no members */ @@ -839,6 +845,7 @@ static int load_devices(struct devs *devices, char *devmap, close(mdfd); free(devices); free(devmap); + free(best); *stp = st; return -1; } @@ -1950,6 +1957,7 @@ out: } else if (mdfd >= 0) close(mdfd); + free(best); /* '2' means 'OK, but not started yet' */ if (rv == -1) { free(devices); @@ -1982,12 +1990,10 @@ int assemble_container_content(struct supertype *st, int mdfd, return 1; } - if (strcmp(sra->text_version, content->text_version) != 0) { - if (content->array.major_version == -1 && - content->array.minor_version == -2 && - c->readonly && - content->text_version[0] == '/') - content->text_version[0] = '-'; + /* Fill sysfs properties only if they are not set. Determine it by checking text_version + * and ignoring special character on the first place. + */ + if (strcmp(sra->text_version + 1, content->text_version + 1) != 0) { if (sysfs_set_array(content, 9003) != 0) { sysfs_free(sra); return 1; @@ -24,8 +24,8 @@ #include "mdadm.h" -int Build(char *mddev, struct mddev_dev *devlist, - struct shape *s, struct context *c) +int Build(struct mddev_ident *ident, struct mddev_dev *devlist, struct shape *s, + struct context *c) { /* Build a linear or raid0 arrays without superblocks * We cannot really do any checks, we just do it. @@ -75,13 +75,12 @@ int Build(char *mddev, struct mddev_dev *devlist, /* We need to create the device. It can have no name. */ map_lock(&map); - mdfd = create_mddev(mddev, NULL, c->autof, LOCAL, + mdfd = create_mddev(ident->devname, NULL, c->autof, LOCAL, chosen_name, 0); if (mdfd < 0) { map_unlock(&map); return 1; } - mddev = chosen_name; map_update(&map, fd2devnm(mdfd), "none", uuid, chosen_name); map_unlock(&map); @@ -93,7 +92,7 @@ int Build(char *mddev, struct mddev_dev *devlist, array.nr_disks = s->raiddisks; array.raid_disks = s->raiddisks; array.md_minor = 0; - if (fstat_is_blkdev(mdfd, mddev, &rdev)) + if (fstat_is_blkdev(mdfd, chosen_name, &rdev)) array.md_minor = minor(rdev); array.not_persistent = 1; array.state = 0; /* not clean, but no errors */ @@ -108,8 +107,7 @@ int Build(char *mddev, struct mddev_dev *devlist, array.chunk_size = s->chunk*1024; array.layout = s->layout; if (md_set_array_info(mdfd, &array)) { - pr_err("md_set_array_info() failed for %s: %s\n", - mddev, strerror(errno)); + pr_err("md_set_array_info() failed for %s: %s\n", chosen_name, strerror(errno)); goto abort; } @@ -178,8 +176,8 @@ int Build(char *mddev, struct mddev_dev *devlist, } if (bitmap_fd >= 0) { if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) { - pr_err("Cannot set bitmap file for %s: %s\n", - mddev, strerror(errno)); + pr_err("Cannot set bitmap file for %s: %s\n", chosen_name, + strerror(errno)); goto abort; } } @@ -193,9 +191,8 @@ int Build(char *mddev, struct mddev_dev *devlist, } if (c->verbose >= 0) - pr_err("array %s built and started.\n", - mddev); - wait_for(mddev, mdfd); + pr_err("array %s built and started.\n", chosen_name); + wait_for(chosen_name, mdfd); close(mdfd); return 0; @@ -471,11 +471,8 @@ out: return ret; } -int Create(struct supertype *st, char *mddev, - char *name, int *uuid, - int subdevs, struct mddev_dev *devlist, - struct shape *s, - struct context *c) +int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, + struct mddev_dev *devlist, struct shape *s, struct context *c) { /* * Create a new raid array. @@ -497,6 +494,8 @@ int Create(struct supertype *st, char *mddev, unsigned long long minsize = 0, maxsize = 0; char *mindisc = NULL; char *maxdisc = NULL; + char *name = ident->name; + int *uuid = ident->uuid_set == 1 ? ident->uuid : NULL; int dnum; struct mddev_dev *dv; dev_t rdev; @@ -1015,7 +1014,7 @@ int Create(struct supertype *st, char *mddev, /* We need to create the device */ map_lock(&map); - mdfd = create_mddev(mddev, name, c->autof, LOCAL, chosen_name, 1); + mdfd = create_mddev(ident->devname, ident->name, c->autof, LOCAL, chosen_name, 1); if (mdfd < 0) { map_unlock(&map); return 1; @@ -1032,7 +1031,6 @@ int Create(struct supertype *st, char *mddev, udev_unblock(); return 1; } - mddev = chosen_name; memset(&inf, 0, sizeof(inf)); md_get_array_info(mdfd, &inf); @@ -1050,7 +1048,7 @@ int Create(struct supertype *st, char *mddev, * with, but it chooses to trust me instead. Sigh */ info.array.md_minor = 0; - if (fstat_is_blkdev(mdfd, mddev, &rdev)) + if (fstat_is_blkdev(mdfd, chosen_name, &rdev)) info.array.md_minor = minor(rdev); info.array.not_persistent = 0; @@ -1102,8 +1100,8 @@ int Create(struct supertype *st, char *mddev, info.array.layout = s->layout; info.array.chunk_size = s->chunk*1024; - if (name == NULL || *name == 0) { - /* base name on mddev */ + if (*name == 0) { + /* base name on devname */ /* /dev/md0 -> 0 * /dev/md_d0 -> d0 * /dev/md_foo -> foo @@ -1113,15 +1111,16 @@ int Create(struct supertype *st, char *mddev, * /dev/mdhome -> home */ /* FIXME compare this with rules in create_mddev */ - name = strrchr(mddev, '/'); + name = strrchr(chosen_name, '/'); + if (name) { name++; if (strncmp(name, "md_", 3) == 0 && - strlen(name) > 3 && (name-mddev) == 5 /* /dev/ */) + strlen(name) > 3 && (name - chosen_name) == 5 /* /dev/ */) name += 3; else if (strncmp(name, "md", 2) == 0 && strlen(name) > 2 && isdigit(name[2]) && - (name-mddev) == 5 /* /dev/ */) + (name - chosen_name) == 5 /* /dev/ */) name += 2; } } @@ -1215,8 +1214,7 @@ int Create(struct supertype *st, char *mddev, } rv = set_array_info(mdfd, st, &info); if (rv) { - pr_err("failed to set array info for %s: %s\n", - mddev, strerror(errno)); + pr_err("failed to set array info for %s: %s\n", chosen_name, strerror(errno)); goto abort_locked; } @@ -1237,8 +1235,7 @@ int Create(struct supertype *st, char *mddev, goto abort_locked; } if (ioctl(mdfd, SET_BITMAP_FILE, bitmap_fd) < 0) { - pr_err("Cannot set bitmap file for %s: %s\n", - mddev, strerror(errno)); + pr_err("Cannot set bitmap file for %s: %s\n", chosen_name, strerror(errno)); goto abort_locked; } } @@ -1254,7 +1251,7 @@ int Create(struct supertype *st, char *mddev, * create links */ sysfs_uevent(&info, "change"); if (c->verbose >= 0) - pr_err("container %s prepared.\n", mddev); + pr_err("container %s prepared.\n", chosen_name); wait_for(chosen_name, mdfd); } else if (c->runstop == 1 || subdevs >= s->raiddisks) { if (st->ss->external) { @@ -1312,7 +1309,7 @@ int Create(struct supertype *st, char *mddev, ioctl(mdfd, RESTART_ARRAY_RW, NULL); } if (c->verbose >= 0) - pr_info("array %s started.\n", mddev); + pr_info("array %s started.\n", chosen_name); if (st->ss->external && st->container_devnm[0]) { if (need_mdmon) start_mdmon(st->container_devnm); @@ -254,11 +254,9 @@ int Detail(char *dev, struct context *c) fname_from_uuid(st, info, nbuf, ':'); printf("MD_UUID=%s\n", nbuf + 5); mp = map_by_uuid(&map, info->uuid); - if (mp && mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) { - printf("MD_DEVNAME="); - print_escape(mp->path + DEV_MD_DIR_LEN); - putchar('\n'); - } + + if (mp && mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) + printf("MD_DEVNAME=%s\n", mp->path + DEV_MD_DIR_LEN); if (st->ss->export_detail_super) st->ss->export_detail_super(st); @@ -271,12 +269,9 @@ int Detail(char *dev, struct context *c) __fname_from_uuid(mp->uuid, 0, nbuf, ':'); printf("MD_UUID=%s\n", nbuf+5); } - if (mp && mp->path && - strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) { - printf("MD_DEVNAME="); - print_escape(mp->path + DEV_MD_DIR_LEN); - putchar('\n'); - } + if (mp && mp->path && strncmp(mp->path, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) + printf("MD_DEVNAME=%s\n", mp->path + DEV_MD_DIR_LEN); + map_free(map); } if (!c->no_devices && sra) { diff --git a/Incremental.c b/Incremental.c index f13ce02..3551c65 100644 --- a/Incremental.c +++ b/Incremental.c @@ -1467,17 +1467,6 @@ static int Incremental_container(struct supertype *st, char *devname, st->ss->getinfo_super(st, &info, NULL); - if ((c->runstop > 0 && info.container_enough >= 0) || - info.container_enough > 0) - /* pass */; - else { - if (c->export) { - printf("MD_STARTED=no\n"); - } else if (c->verbose) - pr_err("not enough devices to start the container\n"); - return 0; - } - match = conf_match(st, &info, devname, c->verbose, &rv); if (match == NULL && rv == 2) return rv; @@ -1628,54 +1617,18 @@ release: return rv; } -static void run_udisks(char *arg1, char *arg2) -{ - int pid = fork(); - int status; - if (pid == 0) { - manage_fork_fds(1); - execl("/usr/bin/udisks", "udisks", arg1, arg2, NULL); - execl("/bin/udisks", "udisks", arg1, arg2, NULL); - exit(1); - } - while (pid > 0 && wait(&status) != pid) - ; -} - -static int force_remove(char *devnm, int fd, struct mdinfo *mdi, int verbose) -{ - int rv; - int devid = devnm2devid(devnm); - - run_udisks("--unmount", map_dev(major(devid), minor(devid), 0)); - rv = Manage_stop(devnm, fd, verbose, 1); - if (rv) { - /* At least we can try to trigger a 'remove' */ - sysfs_uevent(mdi, "remove"); - if (verbose) - pr_err("Fail to stop %s too.\n", devnm); - } - return rv; -} - static void remove_from_member_array(struct mdstat_ent *memb, struct mddev_dev *devlist, int verbose) { - int rv; - struct mdinfo mmdi; int subfd = open_dev(memb->devnm); if (subfd >= 0) { - rv = Manage_subdevs(memb->devnm, subfd, devlist, verbose, - 0, UOPT_UNDEFINED, 0); - if (rv & 2) { - if (sysfs_init(&mmdi, -1, memb->devnm)) - pr_err("unable to initialize sysfs for: %s\n", - memb->devnm); - else - force_remove(memb->devnm, subfd, &mmdi, - verbose); - } + /* + * Ignore the return value because it's necessary + * to handle failure condition here. + */ + Manage_subdevs(memb->devnm, subfd, devlist, verbose, + 0, UOPT_UNDEFINED, 0); close(subfd); } } @@ -1758,21 +1711,19 @@ int IncrementalRemove(char *devname, char *id_path, int verbose) } free_mdstat(mdstat); } else { - rv |= Manage_subdevs(ent->devnm, mdfd, &devlist, - verbose, 0, UOPT_UNDEFINED, 0); - if (rv & 2) { - /* Failed due to EBUSY, try to stop the array. - * Give udisks a chance to unmount it first. + /* + * This 'I' incremental remove is a try-best effort, + * the failure condition can be safely ignored + * because of the following up 'r' remove. */ - rv = force_remove(ent->devnm, mdfd, &mdi, verbose); - goto end; - } + Manage_subdevs(ent->devnm, mdfd, &devlist, + verbose, 0, UOPT_UNDEFINED, 0); } devlist.disposition = 'r'; rv = Manage_subdevs(ent->devnm, mdfd, &devlist, verbose, 0, UOPT_UNDEFINED, 0); -end: + close(mdfd); free_mdstat(ent); return rv; @@ -41,6 +41,7 @@ int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl) * 4 - failed to find a superblock. */ + bool free_super = false; int fd, rv = 0; if (force) @@ -52,8 +53,10 @@ int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl) dev); return 2; } - if (st == NULL) + if (st == NULL) { st = guess_super(fd); + free_super = true; + } if (st == NULL || st->ss->init_super == NULL) { if (verbose >= 0) pr_err("Unrecognised md component device - %s\n", dev); @@ -77,6 +80,10 @@ int Kill(char *dev, struct supertype *st, int force, int verbose, int noexcl) rv = 0; } } + if (free_super && st) { + st->ss->free_super(st); + free(st); + } close(fd); return rv; } @@ -30,7 +30,7 @@ # define "CXFLAGS" to give extra flags to CC. # e.g. make CXFLAGS=-O to optimise -CXFLAGS ?=-O2 +CXFLAGS ?=-O2 -D_FORTIFY_SOURCE=2 TCC = tcc UCLIBC_GCC = $(shell for nm in i386-uclibc-linux-gcc i386-uclibc-gcc; do which $$nm > /dev/null && { echo $$nm ; exit; } ; done; echo false No uclibc found ) #DIET_GCC = diet gcc @@ -50,14 +50,30 @@ ifeq ($(origin CC),default) CC := $(CROSS_COMPILE)gcc endif CXFLAGS ?= -ggdb -CWFLAGS = -Wall -Werror -Wstrict-prototypes -Wextra -Wno-unused-parameter +CWFLAGS ?= -Wall -Werror -Wstrict-prototypes -Wextra -Wno-unused-parameter -Wformat -Wformat-security -Werror=format-security -fstack-protector-strong -fPIE -Warray-bounds ifdef WARN_UNUSED -CWFLAGS += -Wp,-D_FORTIFY_SOURCE=2 -O3 +CWFLAGS += -Wp -O3 endif -FALLTHROUGH := $(shell gcc -v --help 2>&1 | grep "implicit-fallthrough" | wc -l) -ifneq "$(FALLTHROUGH)" "0" -CWFLAGS += -Wimplicit-fallthrough=0 +ifeq ($(origin FALLTHROUGH), undefined) + FALLTHROUGH := $(shell gcc -Q --help=warnings 2>&1 | grep "implicit-fallthrough" | wc -l) + ifneq "$(FALLTHROUGH)" "0" + CWFLAGS += -Wimplicit-fallthrough=0 + endif +endif + +ifeq ($(origin FORMATOVERFLOW), undefined) + FORMATOVERFLOW := $(shell gcc -Q --help=warnings 2>&1 | grep "format-overflow" | wc -l) + ifneq "$(FORMATOVERFLOW)" "0" + CWFLAGS += -Wformat-overflow + endif +endif + +ifeq ($(origin STRINGOPOVERFLOW), undefined) + STRINGOPOVERFLOW := $(shell gcc -Q --help=warnings 2>&1 | grep "stringop-overflow" | wc -l) + ifneq "$(STRINGOPOVERFLOW)" "0" + CWFLAGS += -Wstringop-overflow + endif endif ifdef DEBIAN @@ -116,10 +132,12 @@ CFLAGS += -DUSE_PTHREADS MON_LDFLAGS += -pthread endif +LDFLAGS = -Wl,-z,now,-z,noexecstack + # If you want a static binary, you might uncomment these -# LDFLAGS = -static +# LDFLAGS += -static # STRIP = -s -LDLIBS = -ldl +LDLIBS = -ldl -pie # To explicitly disable libudev, set -DNO_LIBUDEV in CXFLAGS ifeq (, $(findstring -DNO_LIBUDEV, $(CXFLAGS))) @@ -209,14 +227,13 @@ mdadm.Os : $(SRCS) $(INCL) $(CC) -o mdadm.Os $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -Os $(SRCS) $(LDLIBS) mdadm.O2 : $(SRCS) $(INCL) mdmon.O2 - $(CC) -o mdadm.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(SRCS) $(LDLIBS) + $(CC) -o mdadm.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -DHAVE_STDINT_H -O2 $(SRCS) $(LDLIBS) mdmon.O2 : $(MON_SRCS) $(INCL) mdmon.h - $(CC) -o mdmon.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -DHAVE_STDINT_H -O2 -D_FORTIFY_SOURCE=2 $(MON_SRCS) $(LDLIBS) + $(CC) -o mdmon.O2 $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -DHAVE_STDINT_H -O2 $(MON_SRCS) $(LDLIBS) -# use '-z now' to guarantee no dynamic linker interactions with the monitor thread mdmon : $(MON_OBJS) | check_rundir - $(CC) $(CFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -Wl,-z,now -o mdmon $(MON_OBJS) $(LDLIBS) + $(CC) $(CFLAGS) $(LDFLAGS) $(MON_LDFLAGS) -o mdmon $(MON_OBJS) $(LDLIBS) msg.o: msg.c msg.h test_stripe : restripe.c xmalloc.o mdadm.h @@ -222,6 +222,7 @@ int Manage_stop(char *devname, int fd, int verbose, int will_retry) if (verbose >= 0) pr_err("Cannot get exclusive access to %s:Perhaps a running process, mounted filesystem or active volume group?\n", devname); + sysfs_free(mdi); return 1; } /* If this is an mdmon managed array, just write 'inactive' @@ -703,6 +704,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, struct supertype *dev_st; int j; mdu_disk_info_t disc; + struct map_ent *map = NULL; if (!get_dev_size(tfd, dv->devname, &ldsize)) { if (dv->disposition == 'M') @@ -801,8 +803,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, rdev, update, devname, verbose, array); dev_st->ss->free_super(dev_st); - if (rv) + if (rv) { + free(dev_st); return rv; + } + } + if (dev_st) { + dev_st->ss->free_super(dev_st); + free(dev_st); } } if (dv->disposition == 'M') { @@ -900,6 +908,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, disc.raid_disk = 0; } + if (map_lock(&map)) + pr_err("failed to get exclusive lock on mapfile when add disk\n"); + if (array->not_persistent==0) { int dfd; if (dv->disposition == 'j') @@ -911,9 +922,9 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, dfd = dev_open(dv->devname, O_RDWR | O_EXCL|O_DIRECT); if (tst->ss->add_to_super(tst, &disc, dfd, dv->devname, INVALID_SECTORS)) - return -1; + goto unlock; if (tst->ss->write_init_super(tst)) - return -1; + goto unlock; } else if (dv->disposition == 'A') { /* this had better be raid1. * As we are "--re-add"ing we must find a spare slot @@ -971,14 +982,14 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, pr_err("add failed for %s: could not get exclusive access to container\n", dv->devname); tst->ss->free_super(tst); - return -1; + goto unlock; } /* Check if metadata handler is able to accept the drive */ if (!tst->ss->validate_geometry(tst, LEVEL_CONTAINER, 0, 1, NULL, 0, 0, dv->devname, NULL, 0, 1)) { close(container_fd); - return -1; + goto unlock; } Kill(dv->devname, NULL, 0, -1, 0); @@ -987,7 +998,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, dv->devname, INVALID_SECTORS)) { close(dfd); close(container_fd); - return -1; + goto unlock; } if (!mdmon_running(tst->container_devnm)) tst->ss->sync_metadata(tst); @@ -998,7 +1009,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, dv->devname); close(container_fd); tst->ss->free_super(tst); - return -1; + goto unlock; } sra->array.level = LEVEL_CONTAINER; /* Need to set data_offset and component_size */ @@ -1013,7 +1024,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, pr_err("add new device to external metadata failed for %s\n", dv->devname); close(container_fd); sysfs_free(sra); - return -1; + goto unlock; } ping_monitor(devnm); sysfs_free(sra); @@ -1027,7 +1038,7 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, else pr_err("add new device failed for %s as %d: %s\n", dv->devname, j, strerror(errno)); - return -1; + goto unlock; } if (dv->disposition == 'j') { pr_err("Journal added successfully, making %s read-write\n", devname); @@ -1038,7 +1049,11 @@ int Manage_add(int fd, int tfd, struct mddev_dev *dv, } if (verbose >= 0) pr_err("added %s\n", dv->devname); + map_unlock(&map); return 1; +unlock: + map_unlock(&map); + return -1; } int Manage_remove(struct supertype *tst, int fd, struct mddev_dev *dv, @@ -1362,7 +1377,7 @@ int Manage_subdevs(char *devname, int fd, unsigned long long array_size; struct mddev_dev *dv; int tfd = -1; - struct supertype *tst; + struct supertype *tst = NULL; char *subarray = NULL; int sysfd = -1; int count = 0; /* number of actions taken */ @@ -1699,6 +1714,7 @@ int Manage_subdevs(char *devname, int fd, break; } } + free(tst); if (frozen > 0) sysfs_set_str(&info, NULL, "sync_action","idle"); if (test && count == 0) @@ -1706,6 +1722,7 @@ int Manage_subdevs(char *devname, int fd, return 0; abort: + free(tst); if (frozen > 0) sysfs_set_str(&info, NULL, "sync_action","idle"); return !test && busy ? 2 : 1; @@ -222,11 +222,10 @@ int Monitor(struct mddev_dev *devlist, info.dosyslog = dosyslog; info.test = c->test; - if (gethostname(info.hostname, sizeof(info.hostname)) != 0) { + if (s_gethostname(info.hostname, sizeof(info.hostname)) != 0) { pr_err("Cannot get hostname.\n"); return 1; } - info.hostname[sizeof(info.hostname) - 1] = '\0'; if (share){ if (check_one_sharer(c->scan) == 2) @@ -122,7 +122,7 @@ int match_keyword(char *word) /** * is_devname_ignore() - check if &devname is a special "<ignore>" keyword. */ -bool is_devname_ignore(char *devname) +bool is_devname_ignore(const char *devname) { static const char keyword[] = "<ignore>"; @@ -132,6 +132,34 @@ bool is_devname_ignore(char *devname) } /** + * ident_log() - generate and write message to the user. + * @param_name: name of the property. + * @value: value of the property. + * @reason: meaningful description. + * @cmdline: context dependent actions, see below. + * + * The function is made to provide similar error handling for both config and cmdline. The behavior + * is configurable via @cmdline. Message has following format: + * "Value "@value" cannot be set for @param_name. Reason: @reason." + * + * If cmdline is on: + * - message is written to stderr. + * otherwise: + * - message is written to stdout. + * - "Value ignored" is added at the end of the message. + */ +static void ident_log(const char *param_name, const char *value, const char *reason, + const bool cmdline) +{ + if (cmdline == true) + pr_err("Value \"%s\" cannot be set as %s. Reason: %s.\n", value, param_name, + reason); + else + pr_info("Value \"%s\" cannot be set as %s. Reason: %s. Value ignored.\n", value, + param_name, reason); +} + +/** * ident_init() - Set defaults. * @ident: ident pointer, not NULL. */ @@ -159,6 +187,127 @@ inline void ident_init(struct mddev_ident *ident) ident->uuid_set = 0; } +/** + * _ident_set_devname()- verify devname and set it in &mddev_ident. + * @ident: pointer to &mddev_ident. + * @devname: devname to be set. + * @cmdline: context dependent actions. If set, ignore keyword is not allowed. + * + * @devname can have following forms: + * '<ignore>' keyword (if allowed) + * /dev/md{number} + * /dev/md_d{number} (legacy) + * /dev/md_{name} + * /dev/md/{name} + * {name} + * + * {name} must follow name's criteria and be POSIX compatible. + * If criteria passed, duplicate memory and set devname in @ident. + * + * Return: %MDADM_STATUS_SUCCESS or %MDADM_STATUS_ERROR. + */ +mdadm_status_t _ident_set_devname(struct mddev_ident *ident, const char *devname, + const bool cmdline) +{ + assert(ident); + assert(devname); + + static const char named_dev_pref[] = DEV_NUM_PREF "_"; + static const int named_dev_pref_size = sizeof(named_dev_pref) - 1; + const char *prop_name = "devname"; + const char *name; + + if (ident->devname) { + ident_log(prop_name, devname, "Already defined", cmdline); + return MDADM_STATUS_ERROR; + } + + if (is_devname_ignore(devname) == true) { + if (!cmdline) + goto pass; + + ident_log(prop_name, devname, "Special keyword is invalid in this context", + cmdline); + return MDADM_STATUS_ERROR; + } + + if (is_devname_md_numbered(devname) == true || is_devname_md_d_numbered(devname) == true) + goto pass; + + if (strncmp(devname, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) + name = devname + DEV_MD_DIR_LEN; + else if (strncmp(devname, named_dev_pref, named_dev_pref_size) == 0) + name = devname + named_dev_pref_size; + else + name = devname; + + if (is_name_posix_compatible(name) == false) { + ident_log(prop_name, name, "Not POSIX compatible", cmdline); + return MDADM_STATUS_ERROR; + } + + if (is_string_lq(name, MD_NAME_MAX + 1) == false) { + ident_log(prop_name, devname, "Invalid length", cmdline); + return MDADM_STATUS_ERROR; + } +pass: + ident->devname = xstrdup(devname); + return MDADM_STATUS_SUCCESS; +} + +/** + * _ident_set_name()- set name in &mddev_ident. + * @ident: pointer to &mddev_ident. + * @name: name to be set. + * @cmdline: context dependent actions. + * + * If criteria passed, set name in @ident. + * + * Return: %MDADM_STATUS_SUCCESS or %MDADM_STATUS_ERROR. + */ +static mdadm_status_t _ident_set_name(struct mddev_ident *ident, const char *name, + const bool cmdline) +{ + assert(name); + assert(ident); + + const char *prop_name = "name"; + + if (ident->name[0]) { + ident_log(prop_name, name, "Already defined", cmdline); + return MDADM_STATUS_ERROR; + } + + if (is_string_lq(name, MD_NAME_MAX + 1) == false) { + ident_log(prop_name, name, "Too long or empty", cmdline); + return MDADM_STATUS_ERROR; + } + + if (is_name_posix_compatible(name) == false) { + ident_log(prop_name, name, "Not POSIX compatible", cmdline); + return MDADM_STATUS_ERROR; + } + + snprintf(ident->name, MD_NAME_MAX + 1, "%s", name); + return MDADM_STATUS_SUCCESS; +} + +/** + * ident_set_devname()- exported, for cmdline. + */ +mdadm_status_t ident_set_devname(struct mddev_ident *ident, const char *name) +{ + return _ident_set_devname(ident, name, true); +} + +/** + * ident_set_name()- exported, for cmdline. + */ +mdadm_status_t ident_set_name(struct mddev_ident *ident, const char *name) +{ + return _ident_set_name(ident, name, true); +} + struct conf_dev { struct conf_dev *next; char *name; @@ -396,29 +545,7 @@ void arrayline(char *line) for (w = dl_next(line); w != line; w = dl_next(w)) { if (w[0] == '/' || strchr(w, '=') == NULL) { - /* This names the device, or is '<ignore>'. - * The rules match those in create_mddev. - * 'w' must be: - * /dev/md/{anything} - * /dev/mdNN - * /dev/md_dNN - * <ignore> - * or anything that doesn't start '/' or '<' - */ - if (is_devname_ignore(w) == true || - strncmp(w, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0 || - (w[0] != '/' && w[0] != '<') || - is_devname_md_numbered(w) == true || - is_devname_md_d_numbered(w) == true) { - /* This is acceptable */; - if (mis.devname) - pr_err("only give one device per ARRAY line: %s and %s\n", - mis.devname, w); - else - mis.devname = w; - }else { - pr_err("%s is an invalid name for an md device - ignored.\n", w); - } + _ident_set_devname(&mis, w, false); } else if (strncasecmp(w, "uuid=", 5) == 0) { if (mis.uuid_set) pr_err("only specify uuid once, %s ignored.\n", @@ -444,14 +571,7 @@ void arrayline(char *line) mis.super_minor = minor; } } else if (strncasecmp(w, "name=", 5) == 0) { - if (mis.name[0]) - pr_err("only specify name once, %s ignored.\n", - w); - else if (strlen(w + 5) > 32) - pr_err("name too long, ignoring %s\n", w); - else - strcpy(mis.name, w + 5); - + _ident_set_name(&mis, w + 5, false); } else if (strncasecmp(w, "bitmap=", 7) == 0) { if (mis.bitmap_file) pr_err("only specify bitmap file once. %s ignored\n", @@ -27,6 +27,24 @@ #include <ctype.h> #include <limits.h> +/** + * is_string_lq() - Check if string length with NULL byte is lower or equal to requested. + * @str: string to check. + * @max_len: max length. + * + * @str length must be bigger than 0 and be lower or equal @max_len, including termination byte. + */ +bool is_string_lq(const char * const str, size_t max_len) +{ + assert(str); + + size_t _len = strnlen(str, max_len); + + if (_len > 0 && _len < max_len) + return true; + return false; +} + bool is_dev_alive(char *path) { if (!path) @@ -465,24 +483,50 @@ void print_quoted(char *str) putchar(q); } -void print_escape(char *str) +/** + * is_alphanum() - Check if sign is letter or digit. + * @c: char to analyze. + * + * Similar to isalnum() but additional locales are excluded. + * + * Return: %true on success, %false otherwise. + */ +bool is_alphanum(const char c) { - /* print str, but change space and tab to '_' - * as is suitable for device names - */ - for (; *str; str++) { - switch (*str) { - case ' ': - case '\t': - putchar('_'); - break; - case '/': - putchar('-'); - break; - default: - putchar(*str); - } + if (isupper(c) || islower(c) || isdigit(c) != 0) + return true; + return false; +} + +/** + * is_name_posix_compatible() - Check if name is POSIX compatible. + * @name: name to check. + * + * POSIX portable file name character set contains ASCII letters, + * digits, '_', '.', and '-'. Also forbid leading '-'. + * The length of the name cannot exceed NAME_MAX - 1 (ensure NULL ending). + * + * Return: %true on success, %false otherwise. + */ +bool is_name_posix_compatible(const char * const name) +{ + assert(name); + + char allowed_symbols[] = "-_."; + const char *n = name; + + if (!is_string_lq(name, NAME_MAX)) + return false; + + if (*n == '-') + return false; + + while (*n != '\0') { + if (!is_alphanum(*n) && !strchr(allowed_symbols, *n)) + return false; + n++; } + return true; } int check_env(char *name) @@ -585,3 +629,22 @@ int parse_num(int *dest, const char *num) *dest = temp; return 0; } + +/** + * s_gethostname() - secure get hostname. Assure null-terminated string. + * + * @buf: buffer for hostname. + * @buf_len: buffer length. + * + * Return: gethostname() result. + */ +int s_gethostname(char *buf, int buf_len) +{ + assert(buf); + + int ret = gethostname(buf, buf_len); + + buf[buf_len - 1] = 0; + + return ret; +} @@ -363,8 +363,7 @@ void RebuildMap(void) char *homehost = conf_get_homehost(&require_homehost); if (homehost == NULL || strcmp(homehost, "<system>")==0) { - if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) { - sys_hostname[sizeof(sys_hostname)-1] = 0; + if (s_gethostname(sys_hostname, sizeof(sys_hostname)) == 0) { homehost = sys_hostname; } } @@ -364,7 +364,7 @@ Use the Intel(R) Matrix Storage Manager metadata format. This creates a which is managed in a similar manner to DDF, and is supported by an option-rom on some platforms: .IP -.B https://www.intel.com/content/www/us/en/support/products/122484/memory-and-storage/ssd-software/intel-virtual-raid-on-cpu-intel-vroc.html +.B https://www.intel.com/content/www/us/en/support/products/122484 .PP .RE @@ -932,17 +932,14 @@ option will be ignored. .BR \-N ", " \-\-name= Set a .B name -for the array. This is currently only effective when creating an -array with a version-1 superblock, or an array in a DDF container. -The name is a simple textual string that can be used to identify array -components when assembling. If name is needed but not specified, it -is taken from the basename of the device that is being created. -e.g. when creating -.I /dev/md/home -the -.B name -will default to -.IR home . +for the array. It must be +.BR "POSIX PORTABLE NAME" +compatible and cannot be longer than 32 chars. This is effective when creating an array +with a v1 metadata, or an external array. + +If name is needed but not specified, it is taken from the basename of the device +that is being created. See +.BR "DEVICE NAMES" .TP .BR \-R ", " \-\-run @@ -1132,8 +1129,10 @@ is much safer. .TP .BR \-N ", " \-\-name= -Specify the name of the array to assemble. This must be the name -that was specified when creating the array. It must either match +Specify the name of the array to assemble. It must be +.BR "POSIX PORTABLE NAME" +compatible and cannot be longer than 32 chars. This must be the name +that was specified when creating the array. It must either match the name stored in the superblock exactly, or it must match with the current .I homehost @@ -2179,14 +2178,17 @@ Usage: .I md-device .BI \-\-chunk= X .BI \-\-level= Y -.br .BI \-\-raid\-devices= Z .I devices .PP -This usage will initialise a new md array, associate some devices with +This usage will initialize a new md array, associate some devices with it, and activate the array. +.I md-device +is a new device. This could be standard name or chosen name. For details see: +.BR "DEVICE NAMES" + The named device will normally not exist when .I "mdadm \-\-create" is run, but will be created by @@ -2227,24 +2229,6 @@ array. This feature can be overridden with the .B \-\-force option. -When creating an array with version-1 metadata a name for the array is -required. -If this is not given with the -.B \-\-name -option, -.I mdadm -will choose a name based on the last component of the name of the -device being created. So if -.B /dev/md3 -is being created, then the name -.B 3 -will be chosen. -If -.B /dev/md/home -is being created, then the name -.B home -will be used. - When creating a partition based array, using .I mdadm with version-1.x metadata, the partition type should be set to @@ -2429,12 +2413,10 @@ and The .B name -option updates the subarray name in the metadata, it may not affect the -device node name or the device node symlink until the subarray is -re\-assembled. If updating -.B name -would change the UUID of an active subarray this operation is blocked, -and the command will end in an error. +option updates the subarray name in the metadata. It must be +.BR "POSIX PORTABLE NAME" +compatible and cannot be longer than 32 chars. If successes, new value will be respected after +next assembly. The .B ppl @@ -3395,6 +3377,10 @@ When .B \-\-incremental mode is used, this file gets a list of arrays currently being created. +.SH POSIX PORTABLE NAME +A valid name can only consist of characters "A-Za-z0-9.-_". +The name cannot start with a leading "-" and cannot exceed 255 chars. + .SH DEVICE NAMES .I mdadm @@ -3416,6 +3402,10 @@ can be given, or just the suffix of the second sort of name, such as .I home can be given. +In every style, raw name must be compatible with +.BR "POSIX PORTABLE NAME" +and has to be no longer than 32 chars. + When .I mdadm chooses device names during auto-assembly or incremental assembly, it @@ -690,20 +690,14 @@ int main(int argc, char *argv[]) case O(CREATE,'N'): case O(ASSEMBLE,'N'): case O(MISC,'N'): - if (ident.name[0]) { - pr_err("name cannot be set twice. Second value %s.\n", optarg); - exit(2); - } if (mode == MISC && !c.subarray) { pr_err("-N/--name only valid with --update-subarray in misc mode\n"); exit(2); } - if (strlen(optarg) > 32) { - pr_err("name '%s' is too long, 32 chars max.\n", - optarg); + + if (ident_set_name(&ident, optarg) != MDADM_STATUS_SUCCESS) exit(2); - } - strcpy(ident.name, optarg); + continue; case O(ASSEMBLE,'m'): /* super-minor for array */ @@ -1290,37 +1284,33 @@ int main(int argc, char *argv[]) pr_err("an md device must be given in this mode\n"); exit(2); } + if (ident_set_devname(&ident, devlist->devname) != MDADM_STATUS_SUCCESS) + exit(1); + if ((int)ident.super_minor == -2 && c.autof) { pr_err("--super-minor=dev is incompatible with --auto\n"); exit(2); } if (mode == MANAGE || mode == GROW) { - mdfd = open_mddev(devlist->devname, 1); + mdfd = open_mddev(ident.devname, 1); if (mdfd < 0) exit(1); ret = fstat(mdfd, &stb); if (ret) { - pr_err("fstat failed on %s.\n", devlist->devname); + pr_err("fstat failed on %s.\n", ident.devname); exit(1); } } else { - char *bname = basename(devlist->devname); - - if (strlen(bname) > MD_NAME_MAX) { - pr_err("Name %s is too long.\n", devlist->devname); - exit(1); - } - - ret = stat(devlist->devname, &stb); + ret = stat(ident.devname, &stb); if (ident.super_minor == -2 && ret != 0) { pr_err("--super-minor=dev given, and listed device %s doesn't exist.\n", - devlist->devname); + ident.devname); exit(1); } if (!ret && !stat_is_md_dev(&stb)) { - pr_err("device %s exists but is not an md array.\n", devlist->devname); + pr_err("device %s exists but is not an md array.\n", ident.devname); exit(1); } } @@ -1340,8 +1330,7 @@ int main(int argc, char *argv[]) if (c.homehost == NULL && c.require_homehost) c.homehost = conf_get_homehost(&c.require_homehost); if (c.homehost == NULL || strcasecmp(c.homehost, "<system>") == 0) { - if (gethostname(sys_hostname, sizeof(sys_hostname)) == 0) { - sys_hostname[sizeof(sys_hostname)-1] = 0; + if (s_gethostname(sys_hostname, sizeof(sys_hostname)) == 0) { c.homehost = sys_hostname; } } @@ -1409,17 +1398,17 @@ int main(int argc, char *argv[]) case MANAGE: /* readonly, add/remove, readwrite, runstop */ if (c.readonly > 0) - rv = Manage_ro(devlist->devname, mdfd, c.readonly); + rv = Manage_ro(ident.devname, mdfd, c.readonly); if (!rv && devs_found > 1) - rv = Manage_subdevs(devlist->devname, mdfd, + rv = Manage_subdevs(ident.devname, mdfd, devlist->next, c.verbose, c.test, c.update, c.force); if (!rv && c.readonly < 0) - rv = Manage_ro(devlist->devname, mdfd, c.readonly); + rv = Manage_ro(ident.devname, mdfd, c.readonly); if (!rv && c.runstop > 0) - rv = Manage_run(devlist->devname, mdfd, &c); + rv = Manage_run(ident.devname, mdfd, &c); if (!rv && c.runstop < 0) - rv = Manage_stop(devlist->devname, mdfd, c.verbose, 0); + rv = Manage_stop(ident.devname, mdfd, c.verbose, 0); break; case ASSEMBLE: if (!c.scan && c.runstop == -1) { @@ -1429,22 +1418,19 @@ int main(int argc, char *argv[]) ident.super_minor == UnSet && ident.name[0] == 0 && !c.scan) { /* Only a device has been given, so get details from config file */ - struct mddev_ident *array_ident = conf_get_ident(devlist->devname); + struct mddev_ident *array_ident = conf_get_ident(ident.devname); if (array_ident == NULL) { - pr_err("%s not identified in config file.\n", - devlist->devname); + pr_err("%s not identified in config file.\n", ident.devname); rv |= 1; if (mdfd >= 0) close(mdfd); } else { if (array_ident->autof == 0) array_ident->autof = c.autof; - rv |= Assemble(ss, devlist->devname, array_ident, - NULL, &c); + rv |= Assemble(ss, ident.devname, array_ident, NULL, &c); } } else if (!c.scan) - rv = Assemble(ss, devlist->devname, &ident, - devlist->next, &c); + rv = Assemble(ss, ident.devname, &ident, devlist->next, &c); else if (devs_found > 0) { if (c.update && devs_found > 1) { pr_err("can only update a single array at a time\n"); @@ -1502,7 +1488,7 @@ int main(int argc, char *argv[]) break; } } - rv = Build(devlist->devname, devlist->next, &s, &c); + rv = Build(&ident, devlist->next, &s, &c); break; case CREATE: if (c.delay == 0) @@ -1539,9 +1525,7 @@ int main(int argc, char *argv[]) break; } - rv = Create(ss, devlist->devname, - ident.name, ident.uuid_set ? ident.uuid : NULL, - devs_found - 1, devlist->next, &s, &c); + rv = Create(ss, &ident, devs_found - 1, devlist->next, &s, &c); break; case MISC: if (devmode == 'E') { @@ -1638,8 +1622,7 @@ int main(int argc, char *argv[]) break; } for (dv = devlist->next; dv; dv = dv->next) { - rv = Grow_Add_device(devlist->devname, mdfd, - dv->devname); + rv = Grow_Add_device(ident.devname, mdfd, dv->devname); if (rv) break; } @@ -1652,18 +1635,15 @@ int main(int argc, char *argv[]) } if (c.delay == 0) c.delay = DEFAULT_BITMAP_DELAY; - rv = Grow_addbitmap(devlist->devname, mdfd, &c, &s); + rv = Grow_addbitmap(ident.devname, mdfd, &c, &s); } else if (grow_continue) - rv = Grow_continue_command(devlist->devname, - mdfd, c.backup_file, - c.verbose); + rv = Grow_continue_command(ident.devname, mdfd, c.backup_file, c.verbose); else if (s.size > 0 || s.raiddisks || s.layout_str || s.chunk != 0 || s.level != UnSet || s.data_offset != INVALID_SECTORS) { - rv = Grow_reshape(devlist->devname, mdfd, - devlist->next, &c, &s); + rv = Grow_reshape(ident.devname, mdfd, devlist->next, &c, &s); } else if (s.consistency_policy != CONSISTENCY_POLICY_UNKNOWN) { - rv = Grow_consistency_policy(devlist->devname, mdfd, &c, &s); + rv = Grow_consistency_policy(ident.devname, mdfd, &c, &s); } else if (array_size == 0) pr_err("no changes to --grow\n"); break; @@ -1709,6 +1689,10 @@ int main(int argc, char *argv[]) autodetect(); break; } + if (ss) { + ss->ss->free_super(ss); + free(ss); + } if (locked) cluster_release_dlmlock(); close_fd(&mdfd); diff --git a/mdadm.conf.5.in b/mdadm.conf.5.in index bc2295c..94e23dd 100644 --- a/mdadm.conf.5.in +++ b/mdadm.conf.5.in @@ -717,10 +717,6 @@ ARRAY /dev/md/home UUID=9187a482:5dde19d9:eea3cc4a:d646ab8b .br auto=part .br -# The name of this array contains a space. -.br -ARRAY /dev/md9 name='Data Storage' -.sp POLICY domain=domain1 metadata=imsm path=pci-0000:00:1f.2-scsi-* .br action=spare @@ -294,6 +294,11 @@ static inline void __put_unaligned32(__u32 val, void *p) #define KIB_TO_BYTES(x) ((x) << 10) #define SEC_TO_BYTES(x) ((x) << 9) +/** + * This is true for native and DDF, IMSM allows 16. + */ +#define MD_NAME_MAX 32 + extern const char Name[]; struct md_bb_entry { @@ -372,9 +377,6 @@ struct mdinfo { int container_member; /* for assembling external-metatdata arrays * This is to be used internally by metadata * handler only */ - int container_enough; /* flag external handlers can set to - * indicate that subarrays have not enough (-1), - * enough to start (0), or all expected disks (1) */ char sys_name[32]; struct mdinfo *devs; struct mdinfo *next; @@ -425,6 +427,12 @@ struct spare_criteria { unsigned int sector_size; }; +typedef enum mdadm_status { + MDADM_STATUS_SUCCESS = 0, + MDADM_STATUS_ERROR, + MDADM_STATUS_UNDEF, +} mdadm_status_t; + enum mode { ASSEMBLE=1, BUILD, @@ -593,7 +601,7 @@ struct mddev_ident { int uuid_set; int uuid[4]; - char name[33]; + char name[MD_NAME_MAX + 1]; int super_minor; @@ -1531,14 +1539,11 @@ extern int Assemble(struct supertype *st, char *mddev, struct mddev_dev *devlist, struct context *c); -extern int Build(char *mddev, struct mddev_dev *devlist, - struct shape *s, struct context *c); +extern int Build(struct mddev_ident *ident, struct mddev_dev *devlist, struct shape *s, + struct context *c); -extern int Create(struct supertype *st, char *mddev, - char *name, int *uuid, - int subdevs, struct mddev_dev *devlist, - struct shape *s, - struct context *c); +extern int Create(struct supertype *st, struct mddev_ident *ident, int subdevs, + struct mddev_dev *devlist, struct shape *s, struct context *c); extern int Detail(char *dev, struct context *c); extern int Detail_Platform(struct superswitch *ss, int scan, int verbose, int export, char *controller_path); @@ -1609,9 +1614,11 @@ extern int check_raid(int fd, char *name); extern int check_partitions(int fd, char *dname, unsigned long long freesize, unsigned long long size); +extern bool is_name_posix_compatible(const char *path); extern int fstat_is_blkdev(int fd, char *devname, dev_t *rdev); extern int stat_is_blkdev(char *devname, dev_t *rdev); +extern bool is_string_lq(const char * const str, size_t max_len); extern bool is_dev_alive(char *path); extern int get_mdp_major(void); extern int get_maj_min(char *dev, int *major, int *minor); @@ -1629,6 +1636,8 @@ extern void manage_fork_fds(int close_all); extern int continue_via_systemd(char *devnm, char *service_name, char *prefix); extern void ident_init(struct mddev_ident *ident); +extern mdadm_status_t ident_set_devname(struct mddev_ident *ident, const char *devname); +extern mdadm_status_t ident_set_name(struct mddev_ident *ident, const char *name); extern int parse_auto(char *str, char *msg, int config); extern struct mddev_ident *conf_get_ident(char *dev); @@ -1646,11 +1655,10 @@ extern int conf_get_monitor_delay(void); extern char *conf_line(FILE *file); extern char *conf_word(FILE *file, int allow_key); extern void print_quoted(char *str); -extern void print_escape(char *str); extern int use_udev(void); extern unsigned long GCD(unsigned long a, unsigned long b); extern int conf_name_is_free(char *name); -extern bool is_devname_ignore(char *devname); +extern bool is_devname_ignore(const char *devname); extern bool is_devname_md_numbered(const char *devname); extern bool is_devname_md_d_numbered(const char *devname); extern int conf_verify_devnames(struct mddev_ident *array_list); @@ -1805,6 +1813,7 @@ extern void set_dlm_hooks(void); extern void sleep_for(unsigned int sec, long nsec, bool wake_after_interrupt); extern bool is_directory(const char *path); extern bool is_file(const char *path); +extern int s_gethostname(char *buf, int buf_len); #define _ROUND_UP(val, base) (((val) + (base) - 1) & ~(base - 1)) #define ROUND_UP(val, base) _ROUND_UP(val, (typeof(val))(base)) @@ -2005,11 +2014,6 @@ enum r0layout { #define VARIABLE_OFFSET 3 /** - * This is true for native and DDF, IMSM allows 16. - */ -#define MD_NAME_MAX 32 - -/** * is_container() - check if @level is &LEVEL_CONTAINER * @level: level value * @@ -240,7 +240,7 @@ static int make_control_sock(char *devname) return -1; addr.sun_family = PF_LOCAL; - strcpy(addr.sun_path, path); + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", path); umask(077); /* ensure no world write access */ if (bind(sfd, (struct sockaddr*)&addr, sizeof(addr)) < 0) { close(sfd); @@ -389,7 +389,7 @@ int main(int argc, char *argv[]) if (all) { struct mdstat_ent *mdstat, *e; - int container_len = strlen(container_name); + int container_len = strnlen(container_name, MD_NAME_MAX); /* launch an mdmon instance for each container found */ mdstat = mdstat_read(0, 0); @@ -472,7 +472,7 @@ static int mdmon(char *devnm, int must_fork, int takeover) pfd[0] = pfd[1] = -1; container = xcalloc(1, sizeof(*container)); - strcpy(container->devnm, devnm); + snprintf(container->devnm, MD_NAME_MAX, "%s", devnm); container->arrays = NULL; container->sock = -1; @@ -193,14 +193,14 @@ int create_mddev(char *dev, char *name, int autof, int trustworthy, if (dev) { if (strncmp(dev, DEV_MD_DIR, DEV_MD_DIR_LEN) == 0) { - strcpy(cname, dev + DEV_MD_DIR_LEN); + snprintf(cname, MD_NAME_MAX, "%s", dev + DEV_MD_DIR_LEN); } else if (strncmp(dev, "/dev/", 5) == 0) { char *e = dev + strlen(dev); while (e > dev && isdigit(e[-1])) e--; if (e[0]) num = strtoul(e, NULL, 10); - strcpy(cname, dev+5); + snprintf(cname, MD_NAME_MAX, "%s", dev + 5); cname[e-(dev+5)] = 0; /* name *must* be mdXX or md_dXX in this context */ if (num < 0 || diff --git a/misc/mdcheck b/misc/mdcheck index 700c3e2..f87999d 100644 --- a/misc/mdcheck +++ b/misc/mdcheck @@ -140,7 +140,13 @@ do echo $a > $fl any=yes done - if [ -z "$any" ]; then exit 0; fi + # mdcheck_continue.timer is started by mdcheck_start.timer. + # When the check action can be finished in mdcheck_start.service, + # it doesn't need mdcheck_continue anymore. + if [ -z "$any" ]; then + systemctl stop mdcheck_continue.timer + exit 0; + fi sleep 120 done diff --git a/platform-intel.c b/platform-intel.c index 914164c..ac282bc 100644 --- a/platform-intel.c +++ b/platform-intel.c @@ -214,7 +214,7 @@ struct sys_dev *device_by_id_and_path(__u16 device_id, const char *path) static int devpath_to_ll(const char *dev_path, const char *entry, unsigned long long *val) { - char path[strlen(dev_path) + strlen(entry) + 2]; + char path[strnlen(dev_path, PATH_MAX) + strnlen(entry, PATH_MAX) + 2]; int fd; int n; @@ -510,9 +510,6 @@ static const struct imsm_orom *find_imsm_hba_orom(struct sys_dev *hba) return get_orom_by_device_id(hba->dev_id); } -#define GUID_STR_MAX 37 /* according to GUID format: - * xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" */ - #define EFI_GUID(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ ((struct efi_guid) \ {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ @@ -703,6 +700,106 @@ const struct imsm_orom *find_imsm_nvme(struct sys_dev *hba) return &nvme_orom->orom; } +#define VMD_REGISTER_OFFSET 0x3FC +#define VMD_REGISTER_SKU_SHIFT 1 +#define VMD_REGISTER_SKU_MASK (0x00000007) +#define VMD_REGISTER_SKU_PREMIUM 2 +#define MD_REGISTER_VER_MAJOR_SHIFT 4 +#define MD_REGISTER_VER_MAJOR_MASK (0x0000000F) +#define MD_REGISTER_VER_MINOR_SHIFT 8 +#define MD_REGISTER_VER_MINOR_MASK (0x0000000F) + +/* + * read_vmd_register() - Reads VMD register and writes contents to buff ptr + * @buff: buffer for vmd register data, should be the size of uint32_t + * + * Return: 0 on success, 1 on error + */ +int read_vmd_register(uint32_t *buff, struct sys_dev *hba) +{ + int fd; + char vmd_pci_config_path[PATH_MAX]; + + if (!vmd_domain_to_controller(hba, vmd_pci_config_path)) + return 1; + + strncat(vmd_pci_config_path, "/config", PATH_MAX - strnlen(vmd_pci_config_path, PATH_MAX)); + + fd = open(vmd_pci_config_path, O_RDONLY); + if (fd < 0) + return 1; + + if (pread(fd, buff, sizeof(uint32_t), VMD_REGISTER_OFFSET) != sizeof(uint32_t)) { + close(fd); + return 1; + } + close(fd); + return 0; +} + +/* + * add_vmd_orom() - Adds VMD orom cap to orom list, writes orom_entry ptr into vmd_orom + * @vmd_orom: pointer to orom entry pointer + * + * Return: 0 on success, 1 on error + */ +int add_vmd_orom(struct orom_entry **vmd_orom, struct sys_dev *hba) +{ + uint8_t sku; + uint32_t vmd_register_data; + struct imsm_orom vmd_orom_cap = { + .signature = IMSM_VMD_OROM_COMPAT_SIGNATURE, + .sss = IMSM_OROM_SSS_4kB | IMSM_OROM_SSS_8kB | + IMSM_OROM_SSS_16kB | IMSM_OROM_SSS_32kB | + IMSM_OROM_SSS_64kB | IMSM_OROM_SSS_128kB, + .dpa = IMSM_OROM_DISKS_PER_ARRAY_NVME, + .tds = IMSM_OROM_TOTAL_DISKS_VMD, + .vpa = IMSM_OROM_VOLUMES_PER_ARRAY, + .vphba = IMSM_OROM_VOLUMES_PER_HBA_VMD, + .attr = IMSM_OROM_ATTR_2TB | IMSM_OROM_ATTR_2TB_DISK, + .driver_features = IMSM_OROM_CAPABILITIES_EnterpriseSystem | + IMSM_OROM_CAPABILITIES_TPV + }; + + if (read_vmd_register(&vmd_register_data, hba) != 0) + return 1; + + sku = (uint8_t)((vmd_register_data >> VMD_REGISTER_SKU_SHIFT) & + VMD_REGISTER_SKU_MASK); + + if (sku == VMD_REGISTER_SKU_PREMIUM) + vmd_orom_cap.rlc = IMSM_OROM_RLC_RAID0 | IMSM_OROM_RLC_RAID1 | + IMSM_OROM_RLC_RAID10 | IMSM_OROM_RLC_RAID5; + else + vmd_orom_cap.rlc = IMSM_OROM_RLC_RAID_CNG; + + vmd_orom_cap.major_ver = (uint8_t) + ((vmd_register_data >> MD_REGISTER_VER_MAJOR_SHIFT) & + MD_REGISTER_VER_MAJOR_MASK); + vmd_orom_cap.minor_ver = (uint8_t) + ((vmd_register_data >> MD_REGISTER_VER_MINOR_SHIFT) & + MD_REGISTER_VER_MINOR_MASK); + + *vmd_orom = add_orom(&vmd_orom_cap); + + return 0; +} + +const struct imsm_orom *find_imsm_vmd(struct sys_dev *hba) +{ + static struct orom_entry *vmd_orom; + + if (hba->type != SYS_DEV_VMD) + return NULL; + + if (!vmd_orom && add_vmd_orom(&vmd_orom, hba) != 0) + return NULL; + + add_orom_device_id(vmd_orom, hba->dev_id); + vmd_orom->type = SYS_DEV_VMD; + return &vmd_orom->orom; +} + const struct imsm_orom *find_imsm_capability(struct sys_dev *hba) { const struct imsm_orom *cap = get_orom_by_device_id(hba->dev_id); @@ -712,9 +809,19 @@ const struct imsm_orom *find_imsm_capability(struct sys_dev *hba) if (hba->type == SYS_DEV_NVME) return find_imsm_nvme(hba); - if ((cap = find_imsm_efi(hba)) != NULL) + + cap = find_imsm_efi(hba); + if (cap) return cap; - if ((cap = find_imsm_hba_orom(hba)) != NULL) + + if (hba->type == SYS_DEV_VMD) { + cap = find_imsm_vmd(hba); + if (cap) + return cap; + } + + cap = find_imsm_hba_orom(hba); + if (cap) return cap; return NULL; diff --git a/platform-intel.h b/platform-intel.h index 2c0f4e3..ce29d3d 100644 --- a/platform-intel.h +++ b/platform-intel.h @@ -19,11 +19,15 @@ #include <asm/types.h> #include <strings.h> +/* according to GUID format: "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" */ +#define GUID_STR_MAX 37 + /* The IMSM Capability (IMSM AHCI and ISCU OROM/EFI variable) Version Table definition */ struct imsm_orom { __u8 signature[4]; #define IMSM_OROM_SIGNATURE "$VER" #define IMSM_NVME_OROM_COMPAT_SIGNATURE "$NVM" + #define IMSM_VMD_OROM_COMPAT_SIGNATURE "$VMD" __u8 table_ver_major; /* Currently 2 (can change with future revs) */ __u8 table_ver_minor; /* Currently 2 (can change with future revs) */ __u16 major_ver; /* Example: 8 as in 8.6.0.1020 */ @@ -65,11 +69,13 @@ struct imsm_orom { __u16 tds; /* Total Disks Supported */ #define IMSM_OROM_TOTAL_DISKS 6 #define IMSM_OROM_TOTAL_DISKS_NVME 12 + #define IMSM_OROM_TOTAL_DISKS_VMD 48 __u8 vpa; /* # Volumes Per Array supported */ #define IMSM_OROM_VOLUMES_PER_ARRAY 2 __u8 vphba; /* # Volumes Per Host Bus Adapter supported */ #define IMSM_OROM_VOLUMES_PER_HBA 4 #define IMSM_OROM_VOLUMES_PER_HBA_NVME 4 + #define IMSM_OROM_VOLUMES_PER_HBA_VMD 24 /* Attributes supported. This should map to the * attributes in the MPB. Also, lower 16 bits * should match/duplicate RLC bits above. @@ -182,7 +188,13 @@ static inline int imsm_orom_is_enterprise(const struct imsm_orom *orom) static inline int imsm_orom_is_nvme(const struct imsm_orom *orom) { return memcmp(orom->signature, IMSM_NVME_OROM_COMPAT_SIGNATURE, - sizeof(orom->signature)) == 0; + sizeof(orom->signature)) == 0; +} + +static inline int imsm_orom_is_vmd_without_efi(const struct imsm_orom *orom) +{ + return memcmp(orom->signature, IMSM_VMD_OROM_COMPAT_SIGNATURE, + sizeof(orom->signature)) == 0; } static inline int imsm_orom_has_tpv_support(const struct imsm_orom *orom) @@ -229,7 +241,7 @@ extern struct orom_entry *orom_entries; static inline char *guid_str(char *buf, struct efi_guid guid) { - sprintf(buf, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + snprintf(buf, GUID_STR_MAX, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", guid.b[3], guid.b[2], guid.b[1], guid.b[0], guid.b[5], guid.b[4], guid.b[7], guid.b[6], guid.b[8], guid.b[9], guid.b[10], guid.b[11], diff --git a/super-ddf.c b/super-ddf.c index 7213284..a87e316 100644 --- a/super-ddf.c +++ b/super-ddf.c @@ -1975,7 +1975,6 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *m info->array.ctime = DECADE + __be32_to_cpu(*cptr); info->array.chunk_size = 0; - info->container_enough = 1; info->disk.major = 0; info->disk.minor = 0; @@ -1984,12 +1983,14 @@ static void getinfo_super_ddf(struct supertype *st, struct mdinfo *info, char *m info->disk.number = be32_to_cpu(ddf->dlist->disk.refnum); info->disk.raid_disk = find_phys(ddf, ddf->dlist->disk.refnum); + if (info->disk.raid_disk < 0) + return; + info->data_offset = be64_to_cpu(ddf->phys-> entries[info->disk.raid_disk]. config_size); info->component_size = ddf->dlist->size - info->data_offset; - if (info->disk.raid_disk >= 0) - pde = ddf->phys->entries + info->disk.raid_disk; + pde = ddf->phys->entries + info->disk.raid_disk; if (pde && !(be16_to_cpu(pde->state) & DDF_Failed) && !(be16_to_cpu(pde->state) & DDF_Missing)) @@ -2364,8 +2365,7 @@ static int init_super_ddf(struct supertype *st, * Remaining 16 are serial number.... maybe a hostname would do? */ memcpy(ddf->controller.guid, T10, sizeof(T10)); - gethostname(hostname, sizeof(hostname)); - hostname[sizeof(hostname) - 1] = 0; + s_gethostname(hostname, sizeof(hostname)); hostlen = strlen(hostname); memcpy(ddf->controller.guid + 24 - hostlen, hostname, hostlen); for (i = strlen(T10) ; i+hostlen < 24; i++) diff --git a/super-intel.c b/super-intel.c index ae0f4a8..6bdd5c4 100644 --- a/super-intel.c +++ b/super-intel.c @@ -499,8 +499,15 @@ struct intel_disk { struct intel_disk *next; }; +/** + * struct extent - reserved space details. + * @start: start offset. + * @size: size of reservation, set to 0 for metadata reservation. + * @vol: index of the volume, meaningful if &size is set. + */ struct extent { unsigned long long start, size; + int vol; }; /* definitions of reshape process types */ @@ -650,6 +657,11 @@ static int check_no_platform(void) char *l = conf_line(fp); char *w = l; + if (l == NULL) { + fclose(fp); + return 0; + } + do { if (strcmp(w, search) == 0) no_platform = 1; @@ -1534,9 +1546,10 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl, int get_minimal_reservation) { /* find a list of used extents on the given physical device */ - struct extent *rv, *e; - int i; int memberships = count_memberships(dl, super); + struct extent *rv = xcalloc(memberships + 1, sizeof(struct extent)); + struct extent *e = rv; + int i; __u32 reservation; /* trim the reserved area for spares, so they can join any array @@ -1548,9 +1561,6 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl, else reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS; - rv = xcalloc(sizeof(struct extent), (memberships + 1)); - e = rv; - for (i = 0; i < super->anchor->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); struct imsm_map *map = get_imsm_map(dev, MAP_0); @@ -1558,6 +1568,7 @@ static struct extent *get_extents(struct intel_super *super, struct dl *dl, if (get_imsm_disk_slot(map, dl->index) >= 0) { e->start = pba_of_lba0(map); e->size = per_dev_array_size(map); + e->vol = i; e++; } } @@ -1639,17 +1650,29 @@ static int is_journal(struct imsm_disk *disk) return (disk->status & JOURNAL_DISK) == JOURNAL_DISK; } -/* round array size down to closest MB and ensure it splits evenly - * between members +/** + * round_member_size_to_mb()- Round given size to closest MiB. + * @size: size to round in sectors. */ -static unsigned long long round_size_to_mb(unsigned long long size, unsigned int - disk_count) +static inline unsigned long long round_member_size_to_mb(unsigned long long size) { - size /= disk_count; - size = (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; - size *= disk_count; + return (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT; +} - return size; +/** + * round_size_to_mb()- Round given size. + * @array_size: size to round in sectors. + * @disk_count: count of data members. + * + * Get size per each data member and round it to closest MiB to ensure that data + * splits evenly between members. + * + * Return: Array size, rounded down. + */ +static inline unsigned long long round_size_to_mb(unsigned long long array_size, + unsigned int disk_count) +{ + return round_member_size_to_mb(array_size / disk_count) * disk_count; } static int able_to_resync(int raid_level, int missing_disks) @@ -2649,9 +2672,14 @@ static void print_imsm_capability(const struct imsm_orom *orom) else printf("Rapid Storage Technology%s\n", imsm_orom_is_enterprise(orom) ? " enterprise" : ""); - if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build) - printf(" Version : %d.%d.%d.%d\n", orom->major_ver, - orom->minor_ver, orom->hotfix_ver, orom->build); + if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build) { + if (imsm_orom_is_vmd_without_efi(orom)) + printf(" Version : %d.%d\n", orom->major_ver, + orom->minor_ver); + else + printf(" Version : %d.%d.%d.%d\n", orom->major_ver, + orom->minor_ver, orom->hotfix_ver, orom->build); + } printf(" RAID Levels :%s%s%s%s%s\n", imsm_orom_has_raid0(orom) ? " raid0" : "", imsm_orom_has_raid1(orom) ? " raid1" : "", @@ -3778,7 +3806,6 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * struct intel_super *super = st->sb; struct imsm_disk *disk; int map_disks = info->array.raid_disks; - int max_enough = -1; int i; struct imsm_super *mpb; @@ -3820,12 +3847,9 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); - int failed, enough, j, missing = 0; + int j = 0; struct imsm_map *map; - __u8 state; - failed = imsm_count_failed(super, dev, MAP_0); - state = imsm_check_degraded(super, dev, failed, MAP_0); map = get_imsm_map(dev, MAP_0); /* any newly missing disks? @@ -3840,36 +3864,10 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char * if (!(ord & IMSM_ORD_REBUILD) && get_imsm_missing(super, idx)) { - missing = 1; break; } } - - if (state == IMSM_T_STATE_FAILED) - enough = -1; - else if (state == IMSM_T_STATE_DEGRADED && - (state != map->map_state || missing)) - enough = 0; - else /* we're normal, or already degraded */ - enough = 1; - if (is_gen_migration(dev) && missing) { - /* during general migration we need all disks - * that process is running on. - * No new missing disk is allowed. - */ - max_enough = -1; - enough = -1; - /* no more checks necessary - */ - break; - } - /* in the missing/failed disk case check to see - * if at least one array is runnable - */ - max_enough = max(max_enough, enough); } - dprintf("enough: %d\n", max_enough); - info->container_enough = max_enough; if (super->disks) { __u32 reserved = imsm_reserved_sectors(super, super->disks); @@ -5533,40 +5531,37 @@ static void imsm_update_version_info(struct intel_super *super) } } -static int check_name(struct intel_super *super, char *name, int quiet) +/** + * imsm_check_name() - check imsm naming criteria. + * @super: &intel_super pointer, not NULL. + * @name: name to check. + * @verbose: verbose level. + * + * Name must be no longer than &MAX_RAID_SERIAL_LEN and must be unique across volumes. + * + * Returns: &true if @name matches, &false otherwise. + */ +static bool imsm_is_name_allowed(struct intel_super *super, const char * const name, + const int verbose) { struct imsm_super *mpb = super->anchor; - char *reason = NULL; - char *start = name; - size_t len = strlen(name); int i; - if (len > 0) { - while (isspace(start[len - 1])) - start[--len] = 0; - while (*start && isspace(*start)) - ++start, --len; - memmove(name, start, len + 1); + if (is_string_lq(name, MAX_RAID_SERIAL_LEN + 1) == false) { + pr_vrb("imsm: Name \"%s\" is too long\n", name); + return false; } - if (len > MAX_RAID_SERIAL_LEN) - reason = "must be 16 characters or less"; - else if (len == 0) - reason = "must be a non-empty string"; - for (i = 0; i < mpb->num_raid_devs; i++) { struct imsm_dev *dev = get_imsm_dev(super, i); if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) { - reason = "already exists"; - break; + pr_vrb("imsm: Name \"%s\" already exists\n", name); + return false; } } - if (reason && !quiet) - pr_err("imsm volume name %s\n", reason); - - return !reason; + return true; } static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, @@ -5661,8 +5656,9 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info, } } - if (!check_name(super, name, 0)) + if (imsm_is_name_allowed(super, name, 1) == false) return 0; + dv = xmalloc(sizeof(*dv)); dev = xcalloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1)); /* @@ -6877,20 +6873,35 @@ static unsigned long long find_size(struct extent *e, int *idx, int num_extents) return end - base_start; } -static unsigned long long merge_extents(struct intel_super *super, int sum_extents) +/** merge_extents() - analyze extents and get free size. + * @super: Intel metadata, not NULL. + * @expanding: if set, we are expanding &super->current_vol. + * + * Build a composite disk with all known extents and generate a size given the + * "all disks in an array must share a common start offset" constraint. + * If a volume is expanded, then return free space after the volume. + * + * Return: Free space or 0 on failure. + */ +static unsigned long long merge_extents(struct intel_super *super, const bool expanding) { - /* build a composite disk with all known extents and generate a new - * 'maxsize' given the "all disks in an array must share a common start - * offset" constraint - */ - struct extent *e = xcalloc(sum_extents, sizeof(*e)); + struct extent *e; struct dl *dl; - int i, j; - int start_extent; - unsigned long long pos; + int i, j, pos_vol_idx = -1; + int extent_idx = 0; + int sum_extents = 0; + unsigned long long pos = 0; unsigned long long start = 0; - unsigned long long maxsize; - unsigned long reserve; + unsigned long long free_size = 0; + + unsigned long pre_reservation = 0; + unsigned long post_reservation = IMSM_RESERVED_SECTORS; + unsigned long reservation_size; + + for (dl = super->disks; dl; dl = dl->next) + if (dl->e) + sum_extents += dl->extent_cnt; + e = xcalloc(sum_extents, sizeof(struct extent)); /* coalesce and sort all extents. also, check to see if we need to * reserve space between member arrays @@ -6909,50 +6920,57 @@ static unsigned long long merge_extents(struct intel_super *super, int sum_exten j = 0; while (i < sum_extents) { e[j].start = e[i].start; + e[j].vol = e[i].vol; e[j].size = find_size(e, &i, sum_extents); j++; if (e[j-1].size == 0) break; } - pos = 0; - maxsize = 0; - start_extent = 0; i = 0; do { - unsigned long long esize; + unsigned long long esize = e[i].start - pos; - esize = e[i].start - pos; - if (esize >= maxsize) { - maxsize = esize; + if (expanding ? pos_vol_idx == super->current_vol : esize >= free_size) { + free_size = esize; start = pos; - start_extent = i; + extent_idx = i; } + pos = e[i].start + e[i].size; + pos_vol_idx = e[i].vol; + i++; } while (e[i-1].size); - free(e); - if (maxsize == 0) + if (free_size == 0) { + dprintf("imsm: Cannot find free size.\n"); + free(e); return 0; + } - /* FIXME assumes volume at offset 0 is the first volume in a - * container - */ - if (start_extent > 0) - reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */ - else - reserve = 0; + if (!expanding && extent_idx != 0) + /* + * Not a real first volume in a container is created, pre_reservation is needed. + */ + pre_reservation = IMSM_RESERVED_SECTORS; - if (maxsize < reserve) - return 0; + if (e[extent_idx].size == 0) + /* + * extent_idx points to the metadata, post_reservation is allready done. + */ + post_reservation = 0; + free(e); - super->create_offset = ~((unsigned long long) 0); - if (start + reserve > super->create_offset) - return 0; /* start overflows create_offset */ - super->create_offset = start + reserve; + reservation_size = pre_reservation + post_reservation; + + if (free_size < reservation_size) { + dprintf("imsm: Reservation size is greater than free space.\n"); + return 0; + } - return maxsize - reserve; + super->create_offset = start + pre_reservation; + return free_size - reservation_size; } static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks) @@ -6998,7 +7016,7 @@ active_arrays_by_format(char *name, char* hba, struct md_list **devlist, int fd = -1; while (dev && !is_fd_valid(fd)) { char *path = xmalloc(strlen(dev->name) + strlen("/dev/") + 1); - num = sprintf(path, "%s%s", "/dev/", dev->name); + num = snprintf(path, PATH_MAX, "%s%s", "/dev/", dev->name); if (num > 0) fd = open(path, O_RDONLY, 0); if (num <= 0 || !is_fd_valid(fd)) { @@ -7550,13 +7568,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, return 0; } - /* count total number of extents for merge */ - i = 0; - for (dl = super->disks; dl; dl = dl->next) - if (dl->e) - i += dl->extent_cnt; - - maxsize = merge_extents(super, i); + maxsize = merge_extents(super, false); if (mpb->num_raid_devs > 0 && size && size != maxsize) pr_err("attempting to create a second volume with size less then remaining space.\n"); @@ -7591,7 +7603,7 @@ static int validate_geometry_imsm_volume(struct supertype *st, int level, * @super: &intel_super pointer, not NULL. * @raiddisks: number of raid disks. * @size: requested size, could be 0 (means max size). - * @chunk: requested chunk. + * @chunk: requested chunk size in KiB. * @freesize: pointer for returned size value. * * Return: &IMSM_STATUS_OK or &IMSM_STATUS_ERROR. @@ -7605,22 +7617,22 @@ static imsm_status_t imsm_get_free_size(struct intel_super *super, const int raiddisks, unsigned long long size, const int chunk, - unsigned long long *freesize) + unsigned long long *freesize, + bool expanding) { struct imsm_super *mpb = super->anchor; struct dl *dl; int i; - int extent_cnt; struct extent *e; + int cnt = 0; + int used = 0; unsigned long long maxsize; - unsigned long long minsize; - int cnt; - int used; + unsigned long long minsize = size; + + if (minsize == 0) + minsize = chunk * 2; /* find the largest common start free region of the possible disks */ - used = 0; - extent_cnt = 0; - cnt = 0; for (dl = super->disks; dl; dl = dl->next) { dl->raiddisk = -1; @@ -7640,19 +7652,18 @@ static imsm_status_t imsm_get_free_size(struct intel_super *super, ; dl->e = e; dl->extent_cnt = i; - extent_cnt += i; cnt++; } - maxsize = merge_extents(super, extent_cnt); - minsize = size; - if (size == 0) - /* chunk is in K */ - minsize = chunk * 2; + maxsize = merge_extents(super, expanding); + if (maxsize < minsize) { + pr_err("imsm: Free space is %llu but must be equal or larger than %llu.\n", + maxsize, minsize); + return IMSM_STATUS_ERROR; + } - if (cnt < raiddisks || (super->orom && used && used != raiddisks) || - maxsize < minsize || maxsize == 0) { - pr_err("not enough devices with space to create array.\n"); + if (cnt < raiddisks || (super->orom && used && used != raiddisks)) { + pr_err("imsm: Not enough devices with space to create array.\n"); return IMSM_STATUS_ERROR; } @@ -7702,7 +7713,7 @@ static imsm_status_t autolayout_imsm(struct intel_super *super, int vol_cnt = super->anchor->num_raid_devs; imsm_status_t rv; - rv = imsm_get_free_size(super, raiddisks, size, chunk, freesize); + rv = imsm_get_free_size(super, raiddisks, size, chunk, freesize, false); if (rv != IMSM_STATUS_OK) return IMSM_STATUS_ERROR; @@ -7897,7 +7908,7 @@ static int kill_subarray_imsm(struct supertype *st, char *subarray_id) if (i < current_vol) continue; - sprintf(subarray, "%u", i); + snprintf(subarray, sizeof(subarray), "%u", i); if (is_subarray_active(subarray, st->devnm)) { pr_err("deleting subarray-%d would change the UUID of active subarray-%d, aborting\n", current_vol, i); @@ -7975,7 +7986,7 @@ static int update_subarray_imsm(struct supertype *st, char *subarray, char *ep; int vol; - if (!check_name(super, name, 0)) + if (imsm_is_name_allowed(super, name, 1) == false) return 2; vol = strtoul(subarray, &ep, 10); @@ -10302,7 +10313,8 @@ static void imsm_process_update(struct supertype *st, if (a->info.container_member == target) break; dev = get_imsm_dev(super, u->dev_idx); - if (a || !check_name(super, name, 1)) { + + if (a || !dev || imsm_is_name_allowed(super, name, 0) == false) { dprintf("failed to rename subarray-%d\n", target); break; } @@ -11270,7 +11282,7 @@ static const char *imsm_get_disk_controller_domain(const char *path) char *drv=NULL; struct stat st; - strcpy(disk_path, disk_by_path); + strncpy(disk_path, disk_by_path, PATH_MAX); strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1); if (stat(disk_path, &st) == 0) { struct sys_dev* hba; @@ -11624,6 +11636,96 @@ static void imsm_update_metadata_locally(struct supertype *st, } } +/** + * imsm_analyze_expand() - check expand properties and calculate new size. + * @st: imsm supertype. + * @geo: new geometry params. + * @array: array info. + * @direction: reshape direction. + * + * Obtain free space after the &array and verify if expand to requested size is + * possible. If geo->size is set to %MAX_SIZE, assume that max free size is + * requested. + * + * Return: + * On success %IMSM_STATUS_OK is returned, geo->size and geo->raid_disks are + * updated. + * On error, %IMSM_STATUS_ERROR is returned. + */ +static imsm_status_t imsm_analyze_expand(struct supertype *st, + struct geo_params *geo, + struct mdinfo *array, + int direction) +{ + struct intel_super *super = st->sb; + struct imsm_dev *dev = get_imsm_dev(super, super->current_vol); + struct imsm_map *map = get_imsm_map(dev, MAP_0); + int data_disks = imsm_num_data_members(map); + + unsigned long long current_size; + unsigned long long free_size; + unsigned long long new_size; + unsigned long long max_size; + + const int chunk_kib = geo->chunksize / 1024; + imsm_status_t rv; + + if (direction == ROLLBACK_METADATA_CHANGES) { + /** + * Accept size for rollback only. + */ + new_size = geo->size * 2; + goto success; + } + + if (data_disks == 0) { + pr_err("imsm: Cannot retrieve data disks.\n"); + return IMSM_STATUS_ERROR; + } + current_size = array->custom_array_size / data_disks; + + rv = imsm_get_free_size(super, dev->vol.map->num_members, 0, chunk_kib, &free_size, true); + if (rv != IMSM_STATUS_OK) { + pr_err("imsm: Cannot find free space for expand.\n"); + return IMSM_STATUS_ERROR; + } + max_size = round_member_size_to_mb(free_size + current_size); + + if (geo->size == MAX_SIZE) + new_size = max_size; + else + new_size = round_member_size_to_mb(geo->size * 2); + + if (new_size == 0) { + pr_err("imsm: Rounded requested size is 0.\n"); + return IMSM_STATUS_ERROR; + } + + if (new_size > max_size) { + pr_err("imsm: Rounded requested size (%llu) is larger than free space available (%llu).\n", + new_size, max_size); + return IMSM_STATUS_ERROR; + } + + if (new_size == current_size) { + pr_err("imsm: Rounded requested size (%llu) is same as current size (%llu).\n", + new_size, current_size); + return IMSM_STATUS_ERROR; + } + + if (new_size < current_size) { + pr_err("imsm: Size reduction is not supported, rounded requested size (%llu) is smaller than current (%llu).\n", + new_size, current_size); + return IMSM_STATUS_ERROR; + } + +success: + dprintf("imsm: New size per member is %llu.\n", new_size); + geo->size = data_disks * new_size; + geo->raid_disks = dev->vol.map->num_members; + return IMSM_STATUS_OK; +} + /*************************************************************************** * Function: imsm_analyze_change * Description: Function analyze change for single volume @@ -11644,13 +11746,6 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, int devNumChange = 0; /* imsm compatible layout value for array geometry verification */ int imsm_layout = -1; - int data_disks; - struct imsm_dev *dev; - struct imsm_map *map; - struct intel_super *super; - unsigned long long current_size; - unsigned long long free_size; - unsigned long long max_size; imsm_status_t rv; getinfo_super_imsm_volume(st, &info, NULL); @@ -11733,95 +11828,20 @@ enum imsm_reshape_type imsm_analyze_change(struct supertype *st, geo->chunksize = info.array.chunk_size; } - chunk = geo->chunksize / 1024; - - super = st->sb; - dev = get_imsm_dev(super, super->current_vol); - map = get_imsm_map(dev, MAP_0); - data_disks = imsm_num_data_members(map); - /* compute current size per disk member - */ - current_size = info.custom_array_size / data_disks; - - if (geo->size > 0 && geo->size != MAX_SIZE) { - /* align component size - */ - geo->size = imsm_component_size_alignment_check( - get_imsm_raid_level(dev->vol.map), - chunk * 1024, super->sector_size, - geo->size * 2); - if (geo->size == 0) { - pr_err("Error. Size expansion is supported only (current size is %llu, requested size /rounded/ is 0).\n", - current_size); - goto analyse_change_exit; - } - } - - if (current_size != geo->size && geo->size > 0) { + if (geo->size > 0) { if (change != -1) { pr_err("Error. Size change should be the only one at a time.\n"); change = -1; goto analyse_change_exit; } - if ((super->current_vol + 1) != super->anchor->num_raid_devs) { - pr_err("Error. The last volume in container can be expanded only (%i/%s).\n", - super->current_vol, st->devnm); - goto analyse_change_exit; - } - /* check the maximum available size - */ - rv = imsm_get_free_size(super, dev->vol.map->num_members, - 0, chunk, &free_size); + rv = imsm_analyze_expand(st, geo, &info, direction); if (rv != IMSM_STATUS_OK) - /* Cannot find maximum available space - */ - max_size = 0; - else { - max_size = free_size + current_size; - /* align component size - */ - max_size = imsm_component_size_alignment_check( - get_imsm_raid_level(dev->vol.map), - chunk * 1024, super->sector_size, - max_size); - } - if (geo->size == MAX_SIZE) { - /* requested size change to the maximum available size - */ - if (max_size == 0) { - pr_err("Error. Cannot find maximum available space.\n"); - change = -1; - goto analyse_change_exit; - } else - geo->size = max_size; - } - - if (direction == ROLLBACK_METADATA_CHANGES) { - /* accept size for rollback only - */ - } else { - /* round size due to metadata compatibility - */ - geo->size = (geo->size >> SECT_PER_MB_SHIFT) - << SECT_PER_MB_SHIFT; - dprintf("Prepare update for size change to %llu\n", - geo->size ); - if (current_size >= geo->size) { - pr_err("Error. Size expansion is supported only (current size is %llu, requested size /rounded/ is %llu).\n", - current_size, geo->size); - goto analyse_change_exit; - } - if (max_size && geo->size > max_size) { - pr_err("Error. Requested size is larger than maximum available size (maximum available size is %llu, requested size /rounded/ is %llu).\n", - max_size, geo->size); - goto analyse_change_exit; - } - } - geo->size *= data_disks; - geo->raid_disks = dev->vol.map->num_members; + goto analyse_change_exit; change = CH_ARRAY_SIZE; } + + chunk = geo->chunksize / 1024; if (!validate_geometry_imsm(st, geo->level, imsm_layout, @@ -1967,6 +1967,14 @@ fail_to_write: return 1; } +static bool has_raid0_layout(struct mdp_superblock_1 *sb) +{ + if (sb->level == 0 && sb->layout != 0) + return true; + else + return false; +} + static int write_init_super1(struct supertype *st) { struct mdp_superblock_1 *sb = st->sb; @@ -1978,12 +1986,17 @@ static int write_init_super1(struct supertype *st) unsigned long long sb_offset; unsigned long long data_offset; long bm_offset; - int raid0_need_layout = 0; + bool raid0_need_layout = false; + + /* Since linux kernel v5.4, raid0 always has a layout */ + if (has_raid0_layout(sb) && get_linux_version() >= 5004000) + raid0_need_layout = true; for (di = st->info; di; di = di->next) { if (di->disk.state & (1 << MD_DISK_JOURNAL)) sb->feature_map |= __cpu_to_le32(MD_FEATURE_JOURNAL); - if (sb->level == 0 && sb->layout != 0) { + if (has_raid0_layout(sb) && !raid0_need_layout) { + struct devinfo *di2 = st->info; unsigned long long s1, s2; s1 = di->dev_size; @@ -1995,7 +2008,7 @@ static int write_init_super1(struct supertype *st) s2 -= di2->data_offset; s2 /= __le32_to_cpu(sb->chunksize); if (s1 != s2) - raid0_need_layout = 1; + raid0_need_layout = true; } } @@ -107,8 +107,12 @@ do_test() { echo -ne "$_script... " if ( set -ex ; . $_script ) &> $targetdir/log then - dmesg | grep -iq "error\|call trace\|segfault" && - die "dmesg prints errors when testing $_basename!" + if [ -f "${_script}.inject_error" ]; then + echo "dmesg checking is skipped because test inject error" + else + dmesg | grep -iq "error\|call trace\|segfault" && + die "dmesg prints errors when testing $_basename!" + fi echo "succeeded" _fail=0 else diff --git a/tests/00confnames b/tests/00confnames new file mode 100644 index 0000000..10823f0 --- /dev/null +++ b/tests/00confnames @@ -0,0 +1,107 @@ +set -x -e +. tests/templates/names_template + +# Test how <devname> and <name> from config are handled during Incremental assemblation. +# 1-6 <devnode> only tests (no <name> in config). +# 6-10 <devname> and <name> combinations are tested. +# 11-13 corner cases. + +names_create "/dev/md/name" +local _UUID="$(mdadm -D --export /dev/md127 | grep MD_UUID | cut -d'=' -f2)" +[[ "$_UUID" == "" ]] && echo "Cannot obtain UUID for $DEVNODE_NAME" && exit 1 + + +# 1. <devname> definition consistent with metadata name. +names_make_conf $_UUID "/dev/md/name" "empty" $config +mdadm -S "/dev/md127" +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 2. Same as 1, but use short name form of <devname>. +names_make_conf $_UUID "name" "empty" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 3. Same as 1, but use different <devname> than metadata provides. +names_make_conf $_UUID "/dev/md/other" "empty" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "other" "name" +mdadm -S "/dev/md127" + +# 4. Same as 3, but use short name form of <devname>. +names_make_conf $_UUID "other" "empty" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "other" "name" +mdadm -S "/dev/md127" + +# 5. Force particular node creation by setting <devname> to /dev/mdX. Link is not created in this +# case. +names_make_conf $_UUID "/dev/md4" "empty" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md4" "empty" "name" +mdadm -S "/dev/md4" + +# 6. <devname> set to /dev/mdX, <name> same as in metadata. +# Metadata name and default node used - controversial. Current behavior documented. +names_make_conf $_UUID "/dev/md22" "name" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 7. <devname> set to /dev/mdX, <name> different than in metadata. +# Metadata name and default node used - controversial. Current behavior documented. +names_make_conf $_UUID "/dev/md8" "other" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 8. Both <devname> and <name> different than in metadata. +# Metadata name and default node used - controversial. Current behavior documented. +names_make_conf $_UUID "devnode" "other_name" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 9. <devname> set to metadata name, <name> different than in metadata. +# Metadata name and default node used - controversial. Current behavior documented. +names_make_conf $_UUID "name" "other_name" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 10. Bad <devname> set, no <name>. +# Metadata name and default node used - expected. +names_make_conf $_UUID "/im/bad/devname" "empty" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 11. <devname> with some special symbols and locales, no <name>. +# <devname> should be ignored. +names_make_conf $_UUID "tźż-\.,<>st+-" "empty" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 12. No <devname> and <name> set. +# Metadata name and default node used - expected. +names_make_conf $_UUID "empty" "empty" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 13. No <devname>, <name> set to /dev/mdX. +# Entry should be ignored, it is not ignored but result is good anyway. +names_make_conf $_UUID "empty" "/dev/md12" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" + +# 13. No <devname>, <name> with special symbols and locales. +# Entry should be ignored, it is not ignored but result is good anyway. +names_make_conf $_UUID "empty" "./\śćń#&" $config +mdadm -I $dev0 --config=$config +names_verify "/dev/md127" "name" "name" +mdadm -S "/dev/md127" diff --git a/tests/00createnames b/tests/00createnames index 64b81b9..a95e7d2 100644 --- a/tests/00createnames +++ b/tests/00createnames @@ -1,93 +1,44 @@ set -x -e +. tests/templates/names_template # Test how <devname> and --name= are handled for create mode. -# We need to check three properties, generated from those parameters: -# - devnode name -# - link in /dev/md/ (MD_DEVNAME property from --detail --export) -# - name in metadata (MD_NAME property from --examine --export) - -function _verify() { - local DEVNODE_NAME="$1" - local WANTED_LINK="$2" - local WANTED_NAME="$3" - - local RES="$(mdadm -D --export $DEVNODE_NAME | grep MD_DEVNAME)" - if [[ "$?" != "0" ]]; then - echo "Cannot get details for $DEVNODE_NAME - unexpected devnode." - exit 1 - fi - - if [[ "$WANTED_LINK" != "empty" ]]; then - local EXPECTED="MD_DEVNAME=$WANTED_LINK" - if [[ "$RES" != "$EXPECTED" ]]; then - echo "$RES doesn't match $EXPECTED." - exit 1 - fi - fi - - - local RES="$(mdadm -E --export $dev0 | grep MD_NAME)" - if [[ "$?" != "0" ]]; then - echo "Cannot get metadata from $dev0." - exit 1 - fi - - local EXPECTED="MD_NAME=$(hostname):$WANTED_NAME" - if [[ "$RES" != "$EXPECTED" ]]; then - echo "$RES doesn't match $EXPECTED." - exit 1 - fi -} - -function _create() { - local DEVNAME=$1 - local NAME=$2 - - if [[ -z "$NAME" ]]; then - mdadm -CR "$DEVNAME" -l0 -n 1 $dev0 --force - else - mdadm -CR "$DEVNAME" --name="$NAME" -l0 -n 1 $dev0 --force - fi - - if [[ "$?" != "0" ]]; then - echo "Cannot create device." - exit 1 - fi -} # The most trivial case. -_create "/dev/md/name" -_verify "/dev/md127" "name" "name" +names_create "/dev/md/name" +names_verify "/dev/md127" "name" "name" mdadm -S "/dev/md127" -_create "name" -_verify "/dev/md127" "name" "name" +names_create "name" +names_verify "/dev/md127" "name" "name" mdadm -S "/dev/md127" # Use 'mdX' as name. -_create "/dev/md/md0" -_verify "/dev/md127" "md0" "md0" +names_create "/dev/md/md0" +names_verify "/dev/md127" "md0" "md0" mdadm -S "/dev/md127" -_create "md0" -_verify "/dev/md127" "md0" "md0" +names_create "md0" +names_verify "/dev/md127" "md0" "md0" mdadm -S "/dev/md127" # <devnode> is used to create MD_DEVNAME but, name is used to create MD_NAME. -_create "/dev/md/devnode" "name" -_verify "/dev/md127" "devnode" "name" +names_create "/dev/md/devnode" "name" +names_verify "/dev/md127" "devnode" "name" mdadm -S "/dev/md127" -_create "devnode" "name" -_verify "/dev/md127" "devnode" "name" +names_create "devnode" "name" +names_verify "/dev/md127" "devnode" "name" mdadm -S "/dev/md127" # Devnode points to /dev/ directory. MD_DEVNAME doesn't exist. -_create "/dev/md0" -_verify "/dev/md0" "empty" "0" +names_create "/dev/md0" +names_verify "/dev/md0" "empty" "0" mdadm -S "/dev/md0" # Devnode points to /dev/ directory and name is set. -_create "/dev/md0" "name" -_verify "/dev/md0" "empty" "name" +names_create "/dev/md0" "name" +names_verify "/dev/md0" "empty" "name" mdadm -S "/dev/md0" + +# Devnode is a special ignore keyword. Should be rejected. +names_create "<ignore>" "name", "true" diff --git a/tests/06name b/tests/06name index 4d5e824..86eaab6 100644 --- a/tests/06name +++ b/tests/06name @@ -3,8 +3,8 @@ set -x # create an array with a name mdadm -CR $md0 -l0 -n2 --metadata=1 --name="Fred" $dev0 $dev1 -mdadm -E $dev0 | grep 'Name : [^:]*:Fred ' > /dev/null || exit 1 -mdadm -D $md0 | grep 'Name : [^:]*:Fred ' > /dev/null || exit 1 +mdadm -E $dev0 | grep 'Name : Fred' > /dev/null || exit 1 +mdadm -D $md0 | grep 'Name : Fred' > /dev/null || exit 1 mdadm -S $md0 mdadm -A $md0 --name="Fred" $devlist diff --git a/tests/23rdev-lifetime b/tests/23rdev-lifetime new file mode 100644 index 0000000..1750b0d --- /dev/null +++ b/tests/23rdev-lifetime @@ -0,0 +1,34 @@ +devname=${dev0##*/} +devt=`cat /sys/block/$devname/dev` +pid="" +runtime=2 + +clean_up_test() { + pill -9 $pid + echo clear > /sys/block/md0/md/array_state +} + +trap 'clean_up_test' EXIT + +add_by_sysfs() { + while true; do + echo $devt > /sys/block/md0/md/new_dev + done +} + +remove_by_sysfs(){ + while true; do + echo remove > /sys/block/md0/md/dev-${devname}/state + done +} + +echo md0 > /sys/module/md_mod/parameters/new_array || die "create md0 failed" + +add_by_sysfs & +pid="$pid $!" + +remove_by_sysfs & +pid="$pid $!" + +sleep $runtime +exit 0 diff --git a/tests/24raid10deadlock b/tests/24raid10deadlock new file mode 100644 index 0000000..ee330aa --- /dev/null +++ b/tests/24raid10deadlock @@ -0,0 +1,88 @@ +devs="$dev0 $dev1 $dev2 $dev3" +runtime=120 +pid="" +action_pid="" + +set_up_injection() +{ + echo -1 > /sys/kernel/debug/fail_make_request/times + echo 1 > /sys/kernel/debug/fail_make_request/probability + echo 0 > /sys/kernel/debug/fail_make_request/verbose + echo 1 > /sys/block/${1##*/}/make-it-fail +} + +clean_up_injection() +{ + echo 0 > /sys/block/${1##*/}/make-it-fail + echo 0 > /sys/kernel/debug/fail_make_request/times + echo 0 > /sys/kernel/debug/fail_make_request/probability + echo 2 > /sys/kernel/debug/fail_make_request/verbose +} + +test_rdev() +{ + while true; do + mdadm -f $md0 $1 &> /dev/null + mdadm -r $md0 $1 &> /dev/null + mdadm --zero-superblock $1 &> /dev/null + mdadm -a $md0 $1 &> /dev/null + sleep $2 + done +} + +test_write_action() +{ + while true; do + echo frozen > /sys/block/md0/md/sync_action + echo idle > /sys/block/md0/md/sync_action + sleep 0.1 + done +} + +set_up_test() +{ + fio -h &> /dev/null || die "fio not found" + + # create a simple raid10 + mdadm -Cv -R -n 4 -l10 $md0 $devs || die "create raid10 failed" +} + +clean_up_test() +{ + clean_up_injection $dev0 + pkill -9 fio + kill -9 $pid + kill -9 $action_pid + + sleep 1 + + if ps $action_pid | tail -1 | awk '{print $3}' | grep D; then + die "thread that is writing sysfs is stuck in D state, deadlock is triggered" + fi + mdadm -S $md0 +} + +cat /sys/kernel/debug/fail_make_request/times || die "fault injection is not enabled" + +trap 'clean_up_test' EXIT + +set_up_test || die "set up test failed" + +# backgroup io pressure +fio -filename=$md0 -rw=randwrite -direct=1 -name=test -bs=4k -numjobs=16 -iodepth=16 & + +# trigger add/remove device by io failure +set_up_injection $dev0 +test_rdev $dev0 2 & +pid="$pid $!" + +# add/remove device directly +test_rdev $dev3 10 & +pid="$pid $!" + +test_write_action & +action_pid="$!" + +sleep $runtime + +exit 0 diff --git a/tests/24raid10deadlock.inject_error b/tests/24raid10deadlock.inject_error new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tests/24raid10deadlock.inject_error diff --git a/tests/24raid456deadlock b/tests/24raid456deadlock new file mode 100644 index 0000000..80e6e97 --- /dev/null +++ b/tests/24raid456deadlock @@ -0,0 +1,58 @@ +devs="$dev0 $dev1 $dev2 $dev3 $dev4 $dev5" +runtime=120 +pid="" +old=`cat /proc/sys/vm/dirty_background_ratio` + +test_write_action() +{ + while true; do + echo check > /sys/block/md0/md/sync_action &> /dev/null + sleep 0.1 + echo idle > /sys/block/md0/md/sync_action &> /dev/null + done +} + +test_write_back() +{ + fio -filename=$md0 -bs=4k -rw=write -numjobs=1 -name=test \ + -time_based -runtime=$runtime &> /dev/null +} + +set_up_test() +{ + fio -h &> /dev/null || die "fio not found" + + # create a simple raid6 + mdadm -Cv -R -n 6 -l6 $md0 $devs --assume-clean || die "create raid6 failed" + + # trigger dirty pages write back + echo 0 > /proc/sys/vm/dirty_background_ratio +} + +clean_up_test() +{ + echo $old > /proc/sys/vm/dirty_background_ratio + + pkill -9 fio + kill -9 $pid + + sleep 1 + + if ps $pid | tail -1 | awk '{print $3}' | grep D; then + die "thread that is writing sysfs is stuck in D state, deadlock is triggered" + fi + mdadm -S $md0 +} + +trap 'clean_up_test' EXIT + +set_up_test || die "set up test failed" + +test_write_back & + +test_write_action & +pid="$!" + +sleep $runtime + +exit 0 diff --git a/tests/25raid456-recovery-while-reshape b/tests/25raid456-recovery-while-reshape new file mode 100644 index 0000000..3f6251b --- /dev/null +++ b/tests/25raid456-recovery-while-reshape @@ -0,0 +1,33 @@ +devs="$dev0 $dev1 $dev2" + +set_up_test() +{ + mdadm -Cv -R -n 3 -l5 $md0 $devs --assume-clean --size=50M || die "create array failed" + mdadm -a $md0 $dev3 $dev4 || die "failed to bind new disk to array" + echo 1000 > /sys/block/md0/md/sync_speed_max +} + +clean_up_test() +{ + mdadm -S $md0 +} + +trap 'clean_up_test' EXIT + +set_up_test || die "set up test failed" + +# trigger reshape +mdadm --grow -l 6 $md0 +sleep 1 + +# set up replacement +echo frozen > /sys/block/md0/md/sync_action +echo want_replacement > /sys/block/md0/md/rd0/state +echo reshape > /sys/block/md0/md/sync_action +sleep 1 + +# reassemeble array +mdadm -S $md0 || die "can't stop array" +mdadm --assemble $md0 $devs $dev3 $dev4 || die "can't assemble array" + +exit 0 diff --git a/tests/25raid456-reshape-corrupt-data b/tests/25raid456-reshape-corrupt-data new file mode 100644 index 0000000..fdb875f --- /dev/null +++ b/tests/25raid456-reshape-corrupt-data @@ -0,0 +1,35 @@ +devs="$dev0 $dev1 $dev2" + +set_up_test() +{ + mdadm -Cv -R -n 3 -l5 $md0 $devs --size=50M || die "create array failed" + mdadm -a $md0 $dev3 || die "failed to bind new disk to array" + mkfs.xfs -f $md0 || die "mkfs failed" + xfs_ncheck $md0 || die "check fs failed" +} + +clean_up_test() +{ + mdadm -S $md0 +} + +trap 'clean_up_test' EXIT + +set_up_test || die "set up test failed" + +# trigger reshape +echo 1000 > /sys/block/md0/md/sync_speed_max +mdadm --grow -l 6 $md0 +sleep 1 + +# stop and start reshape +echo frozen > /sys/block/md0/md/sync_action +echo system > /sys/block/md0/md/sync_speed_max +echo reshape > /sys/block/md0/md/sync_action + +mdadm -W $md0 + +# check if data is corrupted +xfs_ncheck $md0 || die "data is corrupted after reshape" + +exit 0 diff --git a/tests/25raid456-reshape-deadlock b/tests/25raid456-reshape-deadlock new file mode 100644 index 0000000..bfa0cc5 --- /dev/null +++ b/tests/25raid456-reshape-deadlock @@ -0,0 +1,34 @@ +devs="$dev0 $dev1 $dev2" + +set_up_test() +{ + mdadm -Cv -R -n 3 -l5 $md0 $devs --size=50M || die "create array failed" + mdadm -a $md0 $dev3 || die "failed to bind new disk to array" + echo 1000 > /sys/block/md0/md/sync_speed_max +} + +clean_up_test() +{ + echo idle > /sys/block/md0/md/sync_action + mdadm -S $md0 +} + +trap 'clean_up_test' EXIT + +set_up_test || die "set up test failed" + +# trigger reshape +mdadm --grow -l 6 $md0 +sleep 1 + +# stop reshape +echo frozen > /sys/block/md0/md/sync_action + +# read accross reshape +dd if=$md0 of=/dev/NULL bs=1m count=100 iflag=direct &> /dev/null & +sleep 2 + +# suspend array +echo 1 > /sys/block/md0/md/suspend_lo + +exit 0 diff --git a/tests/25raid456-reshape-while-recovery b/tests/25raid456-reshape-while-recovery new file mode 100644 index 0000000..b9f871f --- /dev/null +++ b/tests/25raid456-reshape-while-recovery @@ -0,0 +1,32 @@ +devs="$dev0 $dev1 $dev2" + +set_up_test() +{ + mdadm -Cv -R -n 3 -l5 $md0 $devs --assume-clean --size=50M || die "create array failed" + mdadm -a $md0 $dev3 $dev4 || die "failed to bind new disk to array" + echo 1000 > /sys/block/md0/md/sync_speed_max +} + +clean_up_test() +{ + mdadm -S $md0 +} + +trap 'clean_up_test' EXIT + +set_up_test || die "set up test failed" + +# set up replacement +echo want_replacement > /sys/block/md0/md/rd0/state +sleep 1 + +# trigger reshape +echo frozen > /sys/block/md0/md/sync_action +mdadm --grow -l 6 $md0 +sleep 1 + +# reassemeble array +mdadm -S $md0 || die "can't stop array" +mdadm --assemble $md0 $devs $dev3 $dev4 || die "can't assemble array" + +exit 0 diff --git a/tests/func.sh b/tests/func.sh index 9710a53..5053b01 100644 --- a/tests/func.sh +++ b/tests/func.sh @@ -170,7 +170,6 @@ do_setup() { dd if=/dev/zero of=$targetdir/mdtest$d count=$sz bs=1K > /dev/null 2>&1 # make sure udev doesn't touch mdadm --zero $targetdir/mdtest$d 2> /dev/null - [ -b /dev/loop$d ] || mknod /dev/loop$d b 7 $d if [ $d -eq 7 ] then losetup /dev/loop$d $targetdir/mdtest6 # for multipath use diff --git a/tests/templates/names_template b/tests/templates/names_template new file mode 100644 index 0000000..6181bfa --- /dev/null +++ b/tests/templates/names_template @@ -0,0 +1,80 @@ +# NAME is optional. Testing with native 1.2 superblock. +function names_create() { + local DEVNAME=$1 + local NAME=$2 + local NEG_TEST=$3 + + if [[ -z "$NAME" ]]; then + mdadm -CR "$DEVNAME" -l0 -n 1 $dev0 --force + else + mdadm -CR "$DEVNAME" --name="$NAME" --metadata=1.2 -l0 -n 1 $dev0 --force + fi + + if [[ "$NEG_TEST" == "true" ]]; then + [[ "$?" == "0" ]] && return 0 + echo "Negative verification failed" + exit 1 + fi + + if [[ "$?" != "0" ]]; then + echo "Cannot create device." + exit 1 + fi +} + +# Three properties to check: +# - devnode name +# - link in /dev/md/ (MD_DEVNAME property from --detail --export) +# - name in metadata (MD_NAME property from --detail --export)- that works only with 1.2 sb. +function names_verify() { + local DEVNODE_NAME="$1" + local WANTED_LINK="$2" + local WANTED_NAME="$3" + + local RES="$(mdadm -D --export $DEVNODE_NAME | grep MD_DEVNAME)" + if [[ "$?" != "0" ]]; then + echo "Cannot get details for $DEVNODE_NAME - unexpected devnode." + exit 1 + fi + + if [[ "$WANTED_LINK" != "empty" ]]; then + local EXPECTED="MD_DEVNAME=$WANTED_LINK" + fi + + if [[ "$RES" != "$EXPECTED" ]]; then + echo "$RES doesn't match $EXPECTED." + exit 1 + fi + + local RES="$(mdadm -D --export $DEVNODE_NAME | grep MD_NAME)" + if [[ "$?" != "0" ]]; then + echo "Cannot get metadata from $dev0." + exit 1 + fi + + local EXPECTED="MD_NAME=$(hostname):$WANTED_NAME" + if [[ "$RES" != "$EXPECTED" ]]; then + echo "$RES doesn't match $EXPECTED." + exit 1 + fi +} + +# Generate ARRAYLINE for tested array. +names_make_conf() { + local UUID="$1" + local WANTED_DEVNAME="$2" + local WANTED_NAME="$3" + local CONF="$4" + + local LINE="ARRAY metadata=1.2 UUID=$UUID" + + if [[ "$WANTED_DEVNAME" != "empty" ]]; then + LINE="$LINE $WANTED_DEVNAME" + fi + + if [[ "$WANTED_NAME" != "empty" ]]; then + LINE="$LINE name=$WANTED_NAME" + fi + + echo $LINE > $CONF +} |